diff options
-rw-r--r-- | src/main.c | 978 | ||||
-rw-r--r-- | src/parser.c | 1340 | ||||
-rw-r--r-- | src/parser.h | 18 |
3 files changed, 912 insertions, 1424 deletions
@@ -4,6 +4,7 @@ | |||
4 | 4 | ||
5 | #include "badlib.h" | 5 | #include "badlib.h" |
6 | #include "lexer.c" | 6 | #include "lexer.c" |
7 | #include "parser.c" | ||
7 | 8 | ||
8 | typedef enum ExecMode { | 9 | typedef enum ExecMode { |
9 | RUN_NORMAL, | 10 | RUN_NORMAL, |
@@ -21,983 +22,6 @@ init(void) { | |||
21 | } | 22 | } |
22 | 23 | ||
23 | void | 24 | void |
24 | print_token(Token tok) { | ||
25 | println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.kind], tok.val); | ||
26 | } | ||
27 | |||
28 | void | ||
29 | print_tokens(Str path, Token *tokens) { | ||
30 | for (sz i = 0; i < array_size(tokens); i++) { | ||
31 | Token tok = tokens[i]; | ||
32 | print("%s:", path); | ||
33 | print_token(tok); | ||
34 | } | ||
35 | } | ||
36 | |||
37 | // | ||
38 | // Nodes | ||
39 | // | ||
40 | |||
41 | typedef enum NodeKind { | ||
42 | // Arithmetic. | ||
43 | NODE_ADD, | ||
44 | NODE_SUB, | ||
45 | NODE_DIV, | ||
46 | NODE_MUL, | ||
47 | NODE_MOD, | ||
48 | // Logical. | ||
49 | NODE_NOT, | ||
50 | NODE_AND, | ||
51 | NODE_OR, | ||
52 | NODE_EQ, | ||
53 | NODE_NEQ, | ||
54 | NODE_LT, | ||
55 | NODE_GT, | ||
56 | NODE_LE, | ||
57 | NODE_GE, | ||
58 | // Bitwise ops. | ||
59 | NODE_BITNOT, | ||
60 | NODE_BITAND, | ||
61 | NODE_BITOR, | ||
62 | NODE_BITLSHIFT, | ||
63 | NODE_BITRSHIFT, | ||
64 | // Literals. | ||
65 | NODE_STRING, | ||
66 | NODE_SYMBOL, | ||
67 | NODE_NUM_INT, | ||
68 | NODE_NUM_UINT, | ||
69 | NODE_NUM_FLOAT, | ||
70 | NODE_TRUE, | ||
71 | NODE_FALSE, | ||
72 | NODE_NIL, | ||
73 | NODE_STRUCT_LIT, | ||
74 | // Keywords. | ||
75 | NODE_LET, | ||
76 | NODE_SET, | ||
77 | NODE_STRUCT, | ||
78 | NODE_IF, | ||
79 | NODE_MATCH, | ||
80 | NODE_CASE, | ||
81 | NODE_COND, | ||
82 | NODE_ENUM, | ||
83 | NODE_BREAK, | ||
84 | NODE_CONTINUE, | ||
85 | NODE_WHILE, | ||
86 | // Helpers. | ||
87 | NODE_SYMBOL_IDX, | ||
88 | NODE_TYPE, | ||
89 | NODE_ARR_TYPE, | ||
90 | NODE_COMPOUND_TYPE, | ||
91 | NODE_VAL_FIELD, | ||
92 | NODE_BLOCK, | ||
93 | } NodeKind; | ||
94 | |||
95 | Str node_str[] = { | ||
96 | // Arithmetic. | ||
97 | [NODE_ADD] = cstr("ADD"), | ||
98 | [NODE_SUB] = cstr("SUB"), | ||
99 | [NODE_DIV] = cstr("DIV"), | ||
100 | [NODE_MUL] = cstr("MUL"), | ||
101 | [NODE_MOD] = cstr("MOD"), | ||
102 | // Logical. | ||
103 | [NODE_NOT] = cstr("NOT"), | ||
104 | [NODE_AND] = cstr("AND"), | ||
105 | [NODE_OR] = cstr("OR"), | ||
106 | [NODE_EQ] = cstr("EQ"), | ||
107 | [NODE_NEQ] = cstr("NEQ"), | ||
108 | [NODE_LT] = cstr("LT"), | ||
109 | [NODE_GT] = cstr("GT"), | ||
110 | [NODE_LE] = cstr("LE"), | ||
111 | [NODE_GE] = cstr("GE"), | ||
112 | // Bitwise ops. | ||
113 | [NODE_BITNOT] = cstr("BITNOT"), | ||
114 | [NODE_BITAND] = cstr("BITAND"), | ||
115 | [NODE_BITOR] = cstr("BITOR"), | ||
116 | [NODE_BITLSHIFT] = cstr("BITLSHIFT"), | ||
117 | [NODE_BITRSHIFT] = cstr("BITRSHIFT"), | ||
118 | // Literals. | ||
119 | [NODE_STRING] = cstr("STRING"), | ||
120 | [NODE_SYMBOL] = cstr("SYMBOL"), | ||
121 | [NODE_NUM_INT] = cstr("INT"), | ||
122 | [NODE_NUM_UINT] = cstr("UINT"), | ||
123 | [NODE_NUM_FLOAT] = cstr("FLOAT"), | ||
124 | [NODE_TRUE] = cstr("TRUE"), | ||
125 | [NODE_FALSE] = cstr("FALSE"), | ||
126 | [NODE_NIL] = cstr("NIL"), | ||
127 | [NODE_STRUCT_LIT] = cstr("STRUCT LIT"), | ||
128 | // Keywords. | ||
129 | [NODE_LET] = cstr("LET"), | ||
130 | [NODE_SET] = cstr("SET"), | ||
131 | [NODE_STRUCT] = cstr("STRUCT DEF"), | ||
132 | [NODE_IF] = cstr("IF"), | ||
133 | [NODE_MATCH] = cstr("MATCH"), | ||
134 | [NODE_CASE] = cstr("CASE"), | ||
135 | [NODE_COND] = cstr("COND"), | ||
136 | [NODE_ENUM] = cstr("ENUM"), | ||
137 | [NODE_BREAK] = cstr("BREAK"), | ||
138 | [NODE_CONTINUE] = cstr("CONTINUE"), | ||
139 | [NODE_WHILE] = cstr("WHILE"), | ||
140 | // Helpers. | ||
141 | [NODE_TYPE] = cstr("TYPE"), | ||
142 | [NODE_ARR_TYPE] = cstr("TYPE (ARR)"), | ||
143 | [NODE_SYMBOL_IDX] = cstr("SYMBOL[IDX]"), | ||
144 | [NODE_COMPOUND_TYPE] = cstr("TYPE (COMPOUND)"), | ||
145 | [NODE_VAL_FIELD] = cstr("FIELD"), | ||
146 | [NODE_BLOCK] = cstr("BLOCK"), | ||
147 | }; | ||
148 | |||
149 | typedef struct Node { | ||
150 | sz id; | ||
151 | sz line; | ||
152 | sz col; | ||
153 | |||
154 | NodeKind kind; | ||
155 | union { | ||
156 | f64 d; | ||
157 | sz i; | ||
158 | u64 u; | ||
159 | Str str; | ||
160 | Str sym; | ||
161 | } value; | ||
162 | union { | ||
163 | struct { | ||
164 | struct Node *left; | ||
165 | struct Node *right; | ||
166 | }; | ||
167 | struct { | ||
168 | struct Node *next; | ||
169 | struct Node *arr_size; | ||
170 | }; | ||
171 | struct { | ||
172 | struct Node *var_name; | ||
173 | struct Node *var_type; | ||
174 | struct Node *var_val; | ||
175 | }; | ||
176 | struct { | ||
177 | struct Node *while_cond; | ||
178 | struct Node *while_expr; | ||
179 | }; | ||
180 | struct { | ||
181 | struct Node *cond_if; | ||
182 | struct Node *cond_expr; | ||
183 | struct Node *cond_else; | ||
184 | }; | ||
185 | struct { | ||
186 | struct Node *field_name; | ||
187 | struct Node *field_type; | ||
188 | struct Node *field_val; | ||
189 | }; | ||
190 | struct { | ||
191 | struct Node *match_expr; | ||
192 | struct Node **match_cases; | ||
193 | }; | ||
194 | struct { | ||
195 | struct Node *case_value; | ||
196 | struct Node *case_expr; | ||
197 | }; | ||
198 | struct Node **struct_field; | ||
199 | struct Node **elements; | ||
200 | struct Node **statements; | ||
201 | }; | ||
202 | bool is_ptr; | ||
203 | } Node; | ||
204 | |||
205 | // | ||
206 | // Parsing. | ||
207 | // | ||
208 | |||
209 | typedef struct Parser { | ||
210 | Token current; | ||
211 | Token previous; | ||
212 | Token *tokens; | ||
213 | sz idx; | ||
214 | |||
215 | // Error handling. | ||
216 | bool err; | ||
217 | bool panic; | ||
218 | Str file_name; | ||
219 | |||
220 | // Storage. | ||
221 | Node **nodes; | ||
222 | Arena *storage; | ||
223 | } Parser; | ||
224 | |||
225 | typedef enum { | ||
226 | PREC_NONE = 0, | ||
227 | PREC_LOW, // lowest precedence | ||
228 | PREC_BITLOGIC, // & | | ||
229 | PREC_BITSHIFT, // << >> | ||
230 | PREC_OR, // || | ||
231 | PREC_AND, // && | ||
232 | PREC_EQUALITY, // == != | ||
233 | PREC_COMPARISON, // < > <= >= | ||
234 | PREC_TERM, // + - | ||
235 | PREC_FACTOR, // * / % | ||
236 | PREC_UNARY, // ! - | ||
237 | PREC_CALL, // . () | ||
238 | PREC_PRIMARY // highest precedence | ||
239 | } ParsePrecedence; | ||
240 | |||
241 | typedef void (*ParseFn)(Parser *); | ||
242 | |||
243 | typedef struct { | ||
244 | ParseFn prefix; | ||
245 | ParseFn infix; | ||
246 | ParsePrecedence precedence; | ||
247 | } ParseRule; | ||
248 | |||
249 | void parse_expr(Parser *parser, ParsePrecedence precedence); | ||
250 | void parse_advance(Parser *parser); | ||
251 | bool parse_match(Parser *parser, TokenKind kind); | ||
252 | |||
253 | void parse_grouping(Parser *parser); | ||
254 | void parse_unary(Parser *parser); | ||
255 | void parse_binary(Parser *parser); | ||
256 | void parse_number(Parser *parser); | ||
257 | void parse_literal(Parser *parser); | ||
258 | void parse_string(Parser *parser); | ||
259 | void parse_symbol(Parser *parser); | ||
260 | void parse_keyword(Parser *parser); | ||
261 | void parse_type(Parser *parser); | ||
262 | void parse_block(Parser *parser); | ||
263 | |||
264 | ParseRule parse_rules[] = { | ||
265 | [TOK_LPAREN] = {parse_grouping, NULL, PREC_NONE}, | ||
266 | [TOK_LCURLY] = {parse_block, NULL, PREC_NONE}, | ||
267 | [TOK_AT] = {parse_symbol, NULL, PREC_NONE}, | ||
268 | |||
269 | // Arithmetic. | ||
270 | [TOK_SUB] = {parse_unary, parse_binary, PREC_TERM}, | ||
271 | [TOK_ADD] = {NULL, parse_binary, PREC_TERM}, | ||
272 | [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, | ||
273 | [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, | ||
274 | [TOK_MOD] = {NULL, parse_binary, PREC_FACTOR}, | ||
275 | |||
276 | // Logical. | ||
277 | [TOK_NOT] = {parse_unary, NULL, PREC_NONE}, | ||
278 | [TOK_AND] = {NULL, parse_binary, PREC_AND}, | ||
279 | [TOK_OR] = {NULL, parse_binary, PREC_OR}, | ||
280 | [TOK_EQ] = {NULL, parse_binary, PREC_EQUALITY}, | ||
281 | [TOK_NEQ] = {NULL, parse_binary, PREC_EQUALITY}, | ||
282 | [TOK_LT] = {NULL, parse_binary, PREC_COMPARISON}, | ||
283 | [TOK_GT] = {NULL, parse_binary, PREC_COMPARISON}, | ||
284 | [TOK_LE] = {NULL, parse_binary, PREC_COMPARISON}, | ||
285 | [TOK_GE] = {NULL, parse_binary, PREC_COMPARISON}, | ||
286 | |||
287 | // Bitwise ops. | ||
288 | [TOK_BITNOT] = {parse_unary, NULL, PREC_NONE}, | ||
289 | [TOK_BITAND] = {NULL, parse_binary, PREC_BITLOGIC}, | ||
290 | [TOK_BITOR] = {NULL, parse_binary, PREC_BITLOGIC}, | ||
291 | [TOK_BITLSHIFT] = {NULL, parse_binary, PREC_BITSHIFT}, | ||
292 | [TOK_BITRSHIFT] = {NULL, parse_binary, PREC_BITSHIFT}, | ||
293 | |||
294 | // Literals. | ||
295 | [TOK_STRING] = {parse_string, NULL, PREC_NONE}, | ||
296 | [TOK_SYMBOL] = {parse_symbol, NULL, PREC_NONE}, | ||
297 | [TOK_CHAR] = {parse_number, NULL, PREC_NONE}, | ||
298 | [TOK_NUM_INT] = {parse_number, NULL, PREC_NONE}, | ||
299 | [TOK_NUM_FLOAT] = {parse_number, NULL, PREC_NONE}, | ||
300 | [TOK_TRUE] = {parse_literal, NULL, PREC_NONE}, | ||
301 | [TOK_FALSE] = {parse_literal, NULL, PREC_NONE}, | ||
302 | [TOK_NIL] = {parse_literal, NULL, PREC_NONE}, | ||
303 | |||
304 | // Statements. | ||
305 | [TOK_LET] = {parse_keyword, NULL, PREC_NONE}, | ||
306 | [TOK_SET] = {parse_keyword, NULL, PREC_NONE}, | ||
307 | [TOK_STRUCT] = {parse_keyword, NULL, PREC_NONE}, | ||
308 | [TOK_ENUM] = {parse_keyword, NULL, PREC_NONE}, | ||
309 | [TOK_IF] = {parse_keyword, NULL, PREC_NONE}, | ||
310 | [TOK_MATCH] = {parse_keyword, NULL, PREC_NONE}, | ||
311 | [TOK_COND] = {parse_keyword, NULL, PREC_NONE}, | ||
312 | [TOK_WHILE] = {parse_keyword, NULL, PREC_NONE}, | ||
313 | [TOK_CONTINUE] = {parse_keyword, NULL, PREC_NONE}, | ||
314 | [TOK_BREAK] = {parse_keyword, NULL, PREC_NONE}, | ||
315 | [TOK_FUN] = {parse_keyword, NULL, PREC_NONE}, | ||
316 | [TOK_RETURN] = {parse_keyword, NULL, PREC_NONE}, | ||
317 | |||
318 | [TOK_EOF] = {NULL, NULL, PREC_NONE}, | ||
319 | }; | ||
320 | |||
321 | Node * | ||
322 | node_alloc(Parser *parser, NodeKind kind, Token tok) { | ||
323 | if (parser->panic) return NULL; | ||
324 | static sz id = 0; | ||
325 | Node *node = arena_calloc((sz)sizeof(Node), parser->storage); | ||
326 | node->id = id++; | ||
327 | node->kind = kind; | ||
328 | node->line = tok.line; | ||
329 | node->col = tok.col; | ||
330 | return node; | ||
331 | } | ||
332 | |||
333 | void | ||
334 | parse_emit_err(Parser *parser, Token token, Str msg) { | ||
335 | if (parser->panic) return; | ||
336 | parser->panic = true; | ||
337 | parser->err = true; | ||
338 | eprint("%s:%d:%d: error: %s", parser->file_name, token.line, token.col, | ||
339 | msg); | ||
340 | |||
341 | if (token.kind == TOK_EOF) { | ||
342 | eprintln(" at end of the file"); | ||
343 | } else if (token.kind != TOK_UNKNOWN) { | ||
344 | eprintln(" at %s", token.val); | ||
345 | } | ||
346 | } | ||
347 | |||
348 | void | ||
349 | parse_advance(Parser *parser) { | ||
350 | assert(parser); | ||
351 | parser->previous = parser->current; | ||
352 | if (parser->previous.kind == TOK_EOF) { | ||
353 | return; | ||
354 | } | ||
355 | parser->current = parser->tokens[parser->idx++]; | ||
356 | } | ||
357 | |||
358 | bool | ||
359 | parse_match(Parser *parser, TokenKind kind) { | ||
360 | assert(parser); | ||
361 | if (parser->current.kind != kind) { | ||
362 | return false; | ||
363 | } | ||
364 | parse_advance(parser); | ||
365 | return true; | ||
366 | } | ||
367 | |||
368 | static void | ||
369 | parse_consume(Parser *parser, TokenKind kind, Str msg) { | ||
370 | if (parser->current.kind == kind) { | ||
371 | parse_advance(parser); | ||
372 | return; | ||
373 | } | ||
374 | parse_emit_err(parser, parser->current, msg); | ||
375 | } | ||
376 | |||
377 | void | ||
378 | parse_block(Parser *parser) { | ||
379 | #if DEBUG == 1 | ||
380 | print("parsing block "); | ||
381 | print_token(parser->previous); | ||
382 | #endif | ||
383 | Node *block = node_alloc(parser, NODE_BLOCK, parser->previous); | ||
384 | if (!block) return; | ||
385 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
386 | parse_expr(parser, PREC_LOW); | ||
387 | Node *next = array_pop(parser->nodes); | ||
388 | array_push(block->statements, next, parser->storage); | ||
389 | } | ||
390 | array_push(parser->nodes, block, parser->storage); | ||
391 | } | ||
392 | |||
393 | void | ||
394 | parse_expr(Parser *parser, ParsePrecedence precedence) { | ||
395 | parse_advance(parser); | ||
396 | // TODO: synchronize panic mode on keywords. | ||
397 | if (parser->panic) return; | ||
398 | ParseFn prefix = parse_rules[parser->previous.kind].prefix; | ||
399 | if (prefix == NULL) { | ||
400 | parse_emit_err(parser, parser->previous, cstr("expected expression")); | ||
401 | return; | ||
402 | } | ||
403 | prefix(parser); | ||
404 | |||
405 | while (precedence <= parse_rules[parser->current.kind].precedence) { | ||
406 | parse_advance(parser); | ||
407 | ParseFn infix = parse_rules[parser->previous.kind].infix; | ||
408 | if (infix == NULL) { | ||
409 | parse_emit_err(parser, parser->previous, | ||
410 | cstr("expected expression")); | ||
411 | return; | ||
412 | } | ||
413 | infix(parser); | ||
414 | } | ||
415 | } | ||
416 | |||
417 | void | ||
418 | parse_unary(Parser *parser) { | ||
419 | if (parser->panic) return; | ||
420 | Token prev = parser->previous; | ||
421 | #if DEBUG == 1 | ||
422 | print("parsing unary "); | ||
423 | print_token(prev); | ||
424 | #endif | ||
425 | parse_expr(parser, PREC_UNARY); | ||
426 | Node *node = NULL; | ||
427 | switch (prev.kind) { | ||
428 | case TOK_NOT: node = node_alloc(parser, NODE_NOT, prev); break; | ||
429 | case TOK_BITNOT: node = node_alloc(parser, NODE_BITNOT, prev); break; | ||
430 | default: break; // Unreachable. | ||
431 | } | ||
432 | if (!node) return; | ||
433 | node->left = array_pop(parser->nodes); | ||
434 | array_push(parser->nodes, node, parser->storage); | ||
435 | } | ||
436 | |||
437 | void | ||
438 | parse_literal(Parser *parser) { | ||
439 | if (parser->panic) return; | ||
440 | Token prev = parser->previous; | ||
441 | #if DEBUG == 1 | ||
442 | print("parsing literal "); | ||
443 | print_token(prev); | ||
444 | #endif | ||
445 | Node *node = NULL; | ||
446 | switch (prev.kind) { | ||
447 | case TOK_TRUE: node = node_alloc(parser, NODE_TRUE, prev); break; | ||
448 | case TOK_FALSE: node = node_alloc(parser, NODE_FALSE, prev); break; | ||
449 | case TOK_NIL: node = node_alloc(parser, NODE_NIL, prev); break; | ||
450 | default: return; // Unreachable. | ||
451 | } | ||
452 | if (!node) return; | ||
453 | array_push(parser->nodes, node, parser->storage); | ||
454 | } | ||
455 | |||
456 | void | ||
457 | parse_type(Parser *parser) { | ||
458 | Token prev = parser->previous; | ||
459 | #if DEBUG == 1 | ||
460 | print("parsing type "); | ||
461 | print_token(prev); | ||
462 | #endif | ||
463 | Node *node = node_alloc(parser, NODE_TYPE, prev); | ||
464 | if (!node) return; | ||
465 | if (parse_match(parser, TOK_AT)) { | ||
466 | node->is_ptr = true; | ||
467 | } | ||
468 | if (parse_match(parser, TOK_LCURLY)) { | ||
469 | node->kind = NODE_COMPOUND_TYPE; | ||
470 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
471 | parse_consume(parser, TOK_SYMBOL, cstr("expected name")); | ||
472 | parse_symbol(parser); | ||
473 | Node *next = array_pop(parser->nodes); | ||
474 | parse_consume(parser, TOK_COLON, cstr("incomplete type")); | ||
475 | parse_type(parser); | ||
476 | next->next = array_pop(parser->nodes); | ||
477 | array_push(node->elements, next, parser->storage); | ||
478 | } | ||
479 | } else { | ||
480 | parse_consume(parser, TOK_SYMBOL, | ||
481 | cstr("no type given for struct field")); | ||
482 | node->value.sym = parser->previous.val; | ||
483 | // Optional array value? | ||
484 | if (parse_match(parser, TOK_LSQUARE)) { | ||
485 | node->kind = NODE_ARR_TYPE, | ||
486 | parse_consume(parser, TOK_NUM_INT, cstr("no array size given")); | ||
487 | parse_number(parser); | ||
488 | node->arr_size = array_pop(parser->nodes); | ||
489 | parse_consume(parser, TOK_RSQUARE, | ||
490 | cstr("unmatched brackets ']' in array type")); | ||
491 | } | ||
492 | } | ||
493 | array_push(parser->nodes, node, parser->storage); | ||
494 | } | ||
495 | |||
496 | void | ||
497 | parse_keyword(Parser *parser) { | ||
498 | if (parser->panic) return; | ||
499 | Token prev = parser->previous; | ||
500 | #if DEBUG == 1 | ||
501 | print("parsing keyword "); | ||
502 | print_token(prev); | ||
503 | #endif | ||
504 | Node *node = NULL; | ||
505 | switch (prev.kind) { | ||
506 | case TOK_LET: { | ||
507 | node = node_alloc(parser, NODE_LET, prev); | ||
508 | if (!node) return; | ||
509 | parse_consume(parser, TOK_SYMBOL, | ||
510 | cstr("expected symbol name on let expression")); | ||
511 | parse_symbol(parser); | ||
512 | node->var_name = array_pop(parser->nodes); | ||
513 | if (node->var_name->next) { | ||
514 | parse_emit_err(parser, prev, | ||
515 | cstr("invalid symbol name in let expression")); | ||
516 | return; | ||
517 | } | ||
518 | |||
519 | // Optional type declaration. | ||
520 | if (parse_match(parser, TOK_COLON)) { | ||
521 | parse_type(parser); | ||
522 | node->var_type = array_pop(parser->nodes); | ||
523 | } | ||
524 | |||
525 | // Optional assignment. | ||
526 | if (parse_match(parser, TOK_ASSIGN)) { | ||
527 | parse_expr(parser, PREC_LOW); | ||
528 | node->var_val = array_pop(parser->nodes); | ||
529 | } | ||
530 | } break; | ||
531 | case TOK_SET: { | ||
532 | node = node_alloc(parser, NODE_SET, prev); | ||
533 | if (!node) return; | ||
534 | parse_consume(parser, TOK_SYMBOL, | ||
535 | cstr("expected symbol name on let expression")); | ||
536 | parse_symbol(parser); | ||
537 | node->var_name = array_pop(parser->nodes); | ||
538 | parse_consume(parser, TOK_ASSIGN, | ||
539 | cstr("expected assignment on set expression")); | ||
540 | parse_expr(parser, PREC_LOW); | ||
541 | node->var_val = array_pop(parser->nodes); | ||
542 | } break; | ||
543 | case TOK_STRUCT: { | ||
544 | node = node_alloc(parser, NODE_STRUCT, prev); | ||
545 | if (!node) return; | ||
546 | parse_consume(parser, TOK_SYMBOL, | ||
547 | cstr("expected symbol name on struct definition")); | ||
548 | // Just consume this to avoid conflicts with struct literals. | ||
549 | node->value.sym = parser->previous.val; | ||
550 | |||
551 | parse_consume(parser, TOK_LCURLY, | ||
552 | cstr("expected '{' on struct definition")); | ||
553 | |||
554 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
555 | Node *field = | ||
556 | node_alloc(parser, NODE_VAL_FIELD, parser->current); | ||
557 | if (!field) return; | ||
558 | parse_consume( | ||
559 | parser, TOK_SYMBOL, | ||
560 | cstr("expected symbol name on struct definition")); | ||
561 | parse_symbol(parser); | ||
562 | field->field_name = array_pop(parser->nodes); | ||
563 | if (field->field_name->next || | ||
564 | field->field_name->kind == NODE_SYMBOL_IDX) { | ||
565 | parse_emit_err(parser, prev, | ||
566 | cstr("invalid symbol name in struct field")); | ||
567 | return; | ||
568 | } | ||
569 | parse_consume( | ||
570 | parser, TOK_COLON, | ||
571 | cstr("expected symbol name on struct definition")); | ||
572 | parse_type(parser); | ||
573 | field->field_type = array_pop(parser->nodes); | ||
574 | |||
575 | // Optional assignment. | ||
576 | if (parse_match(parser, TOK_ASSIGN)) { | ||
577 | parse_expr(parser, PREC_LOW); | ||
578 | field->field_val = array_pop(parser->nodes); | ||
579 | } | ||
580 | |||
581 | array_push(node->struct_field, field, parser->storage); | ||
582 | } | ||
583 | } break; | ||
584 | case TOK_IF: { | ||
585 | node = node_alloc(parser, NODE_IF, prev); | ||
586 | if (!node) return; | ||
587 | parse_consume(parser, TOK_LPAREN, | ||
588 | cstr("expected '(' on if expression")); | ||
589 | parse_expr(parser, PREC_LOW); | ||
590 | node->cond_if = array_pop(parser->nodes); | ||
591 | parse_consume(parser, TOK_RPAREN, | ||
592 | cstr("expected ')' on if expression")); | ||
593 | parse_expr(parser, PREC_LOW); | ||
594 | node->cond_expr = array_pop(parser->nodes); | ||
595 | if (parse_match(parser, TOK_ELSE)) { | ||
596 | parse_expr(parser, PREC_LOW); | ||
597 | node->cond_else = array_pop(parser->nodes); | ||
598 | } | ||
599 | } break; | ||
600 | case TOK_MATCH: { | ||
601 | node = node_alloc(parser, NODE_MATCH, prev); | ||
602 | if (!node) return; | ||
603 | parse_consume(parser, TOK_LPAREN, | ||
604 | cstr("expected '(' on if expression")); | ||
605 | parse_expr(parser, PREC_LOW); | ||
606 | node->match_expr = array_pop(parser->nodes); | ||
607 | parse_consume(parser, TOK_RPAREN, | ||
608 | cstr("expected ')' on if expression")); | ||
609 | parse_consume(parser, TOK_LCURLY, | ||
610 | cstr("expected block of match cases")); | ||
611 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
612 | Node *tmp = node_alloc(parser, NODE_CASE, parser->previous); | ||
613 | if (!tmp) return; | ||
614 | // Are we on the default case. | ||
615 | if (!parse_match(parser, TOK_ELSE)) { | ||
616 | parse_consume(parser, TOK_CASE, | ||
617 | cstr("expected case statement")); | ||
618 | parse_expr(parser, PREC_LOW); | ||
619 | tmp->case_value = array_pop(parser->nodes); | ||
620 | } | ||
621 | parse_consume(parser, TOK_ASSIGN, | ||
622 | cstr("malformed case statement")); | ||
623 | parse_expr(parser, PREC_LOW); | ||
624 | tmp->case_expr = array_pop(parser->nodes); | ||
625 | array_push(node->match_cases, tmp, parser->storage); | ||
626 | } | ||
627 | // TODO: Check that we only have literals on the match case, this | ||
628 | // could be done on the analysis step, but also here... | ||
629 | // TODO: Check that there are no multiple default or duplicated | ||
630 | // cases. | ||
631 | } break; | ||
632 | case TOK_ENUM: { | ||
633 | node = node_alloc(parser, NODE_ENUM, prev); | ||
634 | if (!node) return; | ||
635 | parse_consume(parser, TOK_SYMBOL, | ||
636 | cstr("expected symbol name on enum definition")); | ||
637 | // Just consume this to avoid conflicts with struct literals. | ||
638 | node->value.sym = parser->previous.val; | ||
639 | parse_consume(parser, TOK_LCURLY, | ||
640 | cstr("expected '{' on enum definition")); | ||
641 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
642 | Node *field = | ||
643 | node_alloc(parser, NODE_VAL_FIELD, parser->current); | ||
644 | if (!field) return; | ||
645 | parse_consume(parser, TOK_SYMBOL, | ||
646 | cstr("expected symbol name on enum definition")); | ||
647 | parse_symbol(parser); | ||
648 | field->field_name = array_pop(parser->nodes); | ||
649 | if (field->field_name->next || | ||
650 | field->field_name->kind == NODE_SYMBOL_IDX) { | ||
651 | parse_emit_err(parser, prev, | ||
652 | cstr("invalid symbol name in enum field")); | ||
653 | return; | ||
654 | } | ||
655 | if (parse_match(parser, TOK_ASSIGN)) { | ||
656 | parse_expr(parser, PREC_LOW); | ||
657 | field->field_val = array_pop(parser->nodes); | ||
658 | } | ||
659 | array_push(node->struct_field, field, parser->storage); | ||
660 | } | ||
661 | } break; | ||
662 | case TOK_COND: { | ||
663 | node = node_alloc(parser, NODE_COND, prev); | ||
664 | if (!node) return; | ||
665 | parse_consume(parser, TOK_LCURLY, | ||
666 | cstr("expected '{' on cond expression")); | ||
667 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
668 | Node *tmp = node_alloc(parser, NODE_CASE, parser->previous); | ||
669 | if (!tmp) return; | ||
670 | // Are we on the default case. | ||
671 | if (!parse_match(parser, TOK_ELSE)) { | ||
672 | parse_consume(parser, TOK_CASE, | ||
673 | cstr("expected case statement")); | ||
674 | parse_expr(parser, PREC_LOW); | ||
675 | tmp->case_value = array_pop(parser->nodes); | ||
676 | } | ||
677 | parse_consume(parser, TOK_ASSIGN, | ||
678 | cstr("malformed case statement")); | ||
679 | parse_expr(parser, PREC_LOW); | ||
680 | tmp->case_expr = array_pop(parser->nodes); | ||
681 | array_push(node->match_cases, tmp, parser->storage); | ||
682 | } | ||
683 | } break; | ||
684 | case TOK_BREAK: { | ||
685 | node = node_alloc(parser, NODE_BREAK, prev); | ||
686 | if (!node) return; | ||
687 | } break; | ||
688 | case TOK_CONTINUE: { | ||
689 | node = node_alloc(parser, NODE_CONTINUE, prev); | ||
690 | if (!node) return; | ||
691 | } break; | ||
692 | case TOK_WHILE: { | ||
693 | node = node_alloc(parser, NODE_WHILE, prev); | ||
694 | if (!node) return; | ||
695 | parse_consume(parser, TOK_LPAREN, | ||
696 | cstr("expected '(' on while expression")); | ||
697 | parse_expr(parser, PREC_LOW); | ||
698 | node->while_cond = array_pop(parser->nodes); | ||
699 | parse_consume(parser, TOK_RPAREN, | ||
700 | cstr("expected ')' on while expression")); | ||
701 | parse_expr(parser, PREC_LOW); | ||
702 | node->while_expr = array_pop(parser->nodes); | ||
703 | } break; | ||
704 | default: return; // Unreachable. | ||
705 | } | ||
706 | array_push(parser->nodes, node, parser->storage); | ||
707 | } | ||
708 | |||
709 | void | ||
710 | parse_binary(Parser *parser) { | ||
711 | if (parser->panic) return; | ||
712 | Token prev = parser->previous; | ||
713 | #if DEBUG == 1 | ||
714 | print("parsing binary "); | ||
715 | print_token(prev); | ||
716 | #endif | ||
717 | ParseRule rule = parse_rules[prev.kind]; | ||
718 | parse_expr(parser, rule.precedence + 1); | ||
719 | |||
720 | Node *node; | ||
721 | switch (prev.kind) { | ||
722 | case TOK_ADD: node = node_alloc(parser, NODE_ADD, prev); break; | ||
723 | case TOK_SUB: node = node_alloc(parser, NODE_SUB, prev); break; | ||
724 | case TOK_MUL: node = node_alloc(parser, NODE_MUL, prev); break; | ||
725 | case TOK_DIV: node = node_alloc(parser, NODE_DIV, prev); break; | ||
726 | case TOK_MOD: node = node_alloc(parser, NODE_MOD, prev); break; | ||
727 | case TOK_AND: node = node_alloc(parser, NODE_AND, prev); break; | ||
728 | case TOK_OR: node = node_alloc(parser, NODE_OR, prev); break; | ||
729 | case TOK_EQ: node = node_alloc(parser, NODE_EQ, prev); break; | ||
730 | case TOK_NEQ: node = node_alloc(parser, NODE_NEQ, prev); break; | ||
731 | case TOK_LT: node = node_alloc(parser, NODE_LT, prev); break; | ||
732 | case TOK_GT: node = node_alloc(parser, NODE_GT, prev); break; | ||
733 | case TOK_LE: node = node_alloc(parser, NODE_LE, prev); break; | ||
734 | case TOK_GE: node = node_alloc(parser, NODE_GE, prev); break; | ||
735 | case TOK_BITAND: { | ||
736 | node = node_alloc(parser, NODE_BITAND, prev); | ||
737 | } break; | ||
738 | case TOK_BITOR: { | ||
739 | node = node_alloc(parser, NODE_BITOR, prev); | ||
740 | } break; | ||
741 | case TOK_BITLSHIFT: { | ||
742 | node = node_alloc(parser, NODE_BITLSHIFT, prev); | ||
743 | } break; | ||
744 | case TOK_BITRSHIFT: { | ||
745 | node = node_alloc(parser, NODE_BITRSHIFT, prev); | ||
746 | } break; | ||
747 | default: { | ||
748 | parse_emit_err(parser, prev, cstr("unreachable")); | ||
749 | return; | ||
750 | } | ||
751 | } | ||
752 | if (!node) return; | ||
753 | node->right = array_pop(parser->nodes); | ||
754 | node->left = array_pop(parser->nodes); | ||
755 | array_push(parser->nodes, node, parser->storage); | ||
756 | } | ||
757 | |||
758 | void | ||
759 | parse_number(Parser *parser) { | ||
760 | if (parser->panic) return; | ||
761 | Token prev = parser->previous; | ||
762 | #if DEBUG == 1 | ||
763 | print("parsing number "); | ||
764 | print_token(prev); | ||
765 | #endif | ||
766 | Node *node = NULL; | ||
767 | switch (prev.kind) { | ||
768 | case TOK_NUM_INT: { | ||
769 | if (str_has_prefix(prev.val, cstr("0x")) || | ||
770 | str_has_prefix(prev.val, cstr("0b"))) { | ||
771 | node = node_alloc(parser, NODE_NUM_UINT, prev); | ||
772 | if (!node) return; | ||
773 | node->value.u = str_to_uint(prev.val); | ||
774 | } else { | ||
775 | node = node_alloc(parser, NODE_NUM_INT, prev); | ||
776 | if (!node) return; | ||
777 | node->value.i = str_to_int(prev.val); | ||
778 | } | ||
779 | } break; | ||
780 | case TOK_NUM_FLOAT: { | ||
781 | node = node_alloc(parser, NODE_NUM_FLOAT, prev); | ||
782 | if (!node) return; | ||
783 | node->value.d = str_to_float(prev.val); | ||
784 | } break; | ||
785 | case TOK_CHAR: { | ||
786 | node = node_alloc(parser, NODE_NUM_INT, prev); | ||
787 | if (!node) return; | ||
788 | node->value.i = prev.val.mem[1]; | ||
789 | } break; | ||
790 | default: break; | ||
791 | } | ||
792 | array_push(parser->nodes, node, parser->storage); | ||
793 | } | ||
794 | |||
795 | void | ||
796 | parse_string(Parser *parser) { | ||
797 | if (parser->panic) return; | ||
798 | Token prev = parser->previous; | ||
799 | #if DEBUG == 1 | ||
800 | print("parsing string "); | ||
801 | print_token(prev); | ||
802 | #endif | ||
803 | Node *node = node_alloc(parser, NODE_STRING, prev); | ||
804 | if (!node) return; | ||
805 | node->value.str = str_remove_prefix(prev.val, cstr("\"")); | ||
806 | node->value.str = str_remove_suffix(node->value.str, cstr("\"")); | ||
807 | array_push(parser->nodes, node, parser->storage); | ||
808 | } | ||
809 | |||
810 | void | ||
811 | parse_symbol(Parser *parser) { | ||
812 | if (parser->panic) return; | ||
813 | Token prev = parser->previous; | ||
814 | #if DEBUG == 1 | ||
815 | print("parsing symbol "); | ||
816 | print_token(prev); | ||
817 | #endif | ||
818 | if (prev.kind == TOK_AT) { | ||
819 | parse_consume(parser, TOK_SYMBOL, | ||
820 | cstr("expected symbol after '.' operator")); | ||
821 | parse_symbol(parser); | ||
822 | Node *node = array_pop(parser->nodes); | ||
823 | if (node) { | ||
824 | node->is_ptr = true; | ||
825 | array_push(parser->nodes, node, parser->storage); | ||
826 | } | ||
827 | return; | ||
828 | } | ||
829 | Node *node = node_alloc(parser, NODE_SYMBOL, prev); | ||
830 | if (!node) return; | ||
831 | if (parse_match(parser, TOK_DOT)) { | ||
832 | // Symbol chain. | ||
833 | parse_consume(parser, TOK_SYMBOL, | ||
834 | cstr("expected symbol after '.' operator")); | ||
835 | parse_symbol(parser); | ||
836 | node->next = array_pop(parser->nodes); | ||
837 | } else if (parse_match(parser, TOK_LCURLY)) { | ||
838 | // Struct literal. | ||
839 | node->kind = NODE_STRUCT_LIT; | ||
840 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
841 | Node *field = node_alloc(parser, NODE_VAL_FIELD, parser->current); | ||
842 | parse_consume(parser, TOK_SYMBOL, cstr("expected symbol name")); | ||
843 | parse_symbol(parser); | ||
844 | field->field_name = array_pop(parser->nodes); | ||
845 | parse_consume(parser, TOK_ASSIGN, cstr("expected assignment")); | ||
846 | parse_expr(parser, PREC_LOW); | ||
847 | field->field_type = array_pop(parser->nodes); | ||
848 | array_push(node->elements, field, parser->storage); | ||
849 | } | ||
850 | } else if (parse_match(parser, TOK_LSQUARE)) { | ||
851 | node->kind = NODE_SYMBOL_IDX; | ||
852 | parse_consume(parser, TOK_NUM_INT, cstr("no array size given")); | ||
853 | parse_number(parser); | ||
854 | node->arr_size = array_pop(parser->nodes); | ||
855 | parse_consume(parser, TOK_RSQUARE, | ||
856 | cstr("unmatched brackets ']' in array type")); | ||
857 | if (parse_match(parser, TOK_DOT)) { | ||
858 | // Symbol chain. | ||
859 | parse_consume(parser, TOK_SYMBOL, | ||
860 | cstr("expected symbol after '.' operator")); | ||
861 | parse_symbol(parser); | ||
862 | node->next = array_pop(parser->nodes); | ||
863 | } | ||
864 | } else { | ||
865 | node = node_alloc(parser, NODE_SYMBOL, prev); | ||
866 | if (!node) return; | ||
867 | } | ||
868 | node->value.sym = prev.val; | ||
869 | array_push(parser->nodes, node, parser->storage); | ||
870 | } | ||
871 | |||
872 | void | ||
873 | parse_grouping(Parser *parser) { | ||
874 | if (parser->panic) return; | ||
875 | #if DEBUG == 1 | ||
876 | print("parsing group "); | ||
877 | print_token(parser->previous); | ||
878 | #endif | ||
879 | parse_expr(parser, PREC_LOW); | ||
880 | parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression")); | ||
881 | } | ||
882 | |||
883 | void | ||
884 | graph_node(Node *node) { | ||
885 | if (node == NULL) return; | ||
886 | print("%d [width=2.5,shape=Mrecord,label=\"", node->id); | ||
887 | print("{ <type> %s | { L: %d | C: %d } } ", node_str[node->kind], | ||
888 | node->line, node->col); | ||
889 | switch (node->kind) { | ||
890 | case NODE_NUM_INT: print("| Value: %d", node->value.i); break; | ||
891 | case NODE_NUM_UINT: print("| Value: %x", node->value.u); break; | ||
892 | case NODE_NUM_FLOAT: print("| Value: %f{2}", node->value.d); break; | ||
893 | case NODE_STRING: print("| Value: %s", node->value.str); break; | ||
894 | case NODE_SYMBOL_IDX: | ||
895 | case NODE_STRUCT: | ||
896 | case NODE_STRUCT_LIT: print("| Name: %s", node->value.sym); break; | ||
897 | case NODE_SYMBOL: | ||
898 | case NODE_TYPE: { | ||
899 | if (node->is_ptr) { | ||
900 | print("| Name: @%s", node->value.sym); | ||
901 | } else { | ||
902 | print("| Name: %s", node->value.sym); | ||
903 | } | ||
904 | } break; | ||
905 | default: break; | ||
906 | } | ||
907 | println("\"];"); | ||
908 | |||
909 | switch (node->kind) { | ||
910 | case NODE_COND: | ||
911 | case NODE_MATCH: { | ||
912 | if (node->match_expr) { | ||
913 | println("%d:e->%d:w;", node->id, node->match_expr->id); | ||
914 | graph_node(node->match_expr); | ||
915 | } | ||
916 | for (sz i = 0; i < array_size(node->match_cases); i++) { | ||
917 | Node *next = node->match_cases[i]; | ||
918 | println("%d:e->%d:w;", node->id, next->id); | ||
919 | graph_node(next); | ||
920 | } | ||
921 | } break; | ||
922 | case NODE_BLOCK: | ||
923 | case NODE_STRUCT_LIT: | ||
924 | case NODE_COMPOUND_TYPE: { | ||
925 | for (sz i = 0; i < array_size(node->elements); i++) { | ||
926 | Node *next = node->elements[i]; | ||
927 | println("%d:e->%d:w;", node->id, next->id); | ||
928 | graph_node(next); | ||
929 | } | ||
930 | } break; | ||
931 | case NODE_ENUM: | ||
932 | case NODE_STRUCT: { | ||
933 | for (sz i = 0; i < array_size(node->struct_field); i++) { | ||
934 | Node *next = node->struct_field[i]; | ||
935 | println("%d:e->%d:w;", node->id, next->id); | ||
936 | graph_node(next); | ||
937 | } | ||
938 | } break; | ||
939 | case NODE_IF: { | ||
940 | if (node->cond_if) { | ||
941 | println("%d:e->%d:w;", node->id, node->cond_if->id); | ||
942 | graph_node(node->cond_if); | ||
943 | } | ||
944 | if (node->cond_expr) { | ||
945 | println("%d:e->%d:w;", node->id, node->cond_expr->id); | ||
946 | graph_node(node->cond_expr); | ||
947 | } | ||
948 | if (node->cond_else) { | ||
949 | println("%d:e->%d:w;", node->id, node->cond_else->id); | ||
950 | graph_node(node->cond_else); | ||
951 | } | ||
952 | } break; | ||
953 | case NODE_VAL_FIELD: | ||
954 | case NODE_SET: | ||
955 | case NODE_LET: { | ||
956 | if (node->var_name) { | ||
957 | println("%d:e->%d:w;", node->id, node->var_name->id); | ||
958 | graph_node(node->var_name); | ||
959 | } | ||
960 | if (node->var_type) { | ||
961 | println("%d:e->%d:w;", node->id, node->var_type->id); | ||
962 | graph_node(node->var_type); | ||
963 | } | ||
964 | if (node->var_val) { | ||
965 | println("%d:e->%d:w;", node->id, node->var_val->id); | ||
966 | graph_node(node->var_val); | ||
967 | } | ||
968 | } break; | ||
969 | default: { | ||
970 | if (node->left) { | ||
971 | println("%d:e->%d:w;", node->id, node->left->id); | ||
972 | graph_node(node->left); | ||
973 | } | ||
974 | if (node->right) { | ||
975 | println("%d:e->%d:w;", node->id, node->right->id); | ||
976 | graph_node(node->right); | ||
977 | } | ||
978 | } break; | ||
979 | } | ||
980 | } | ||
981 | |||
982 | void | ||
983 | graph_ast(Node **roots) { | ||
984 | if (roots == NULL) return; | ||
985 | println("digraph ast {"); | ||
986 | println("rankdir=LR;"); | ||
987 | println("ranksep=\"0.95 equally\";"); | ||
988 | println("nodesep=\"0.5 equally\";"); | ||
989 | println("overlap=scale;"); | ||
990 | println("bgcolor=\"transparent\";"); | ||
991 | for (sz i = 0; i < array_size(roots); ++i) { | ||
992 | println("subgraph %d {", i); | ||
993 | Node *root = roots[array_size(roots) - 1 - i]; | ||
994 | graph_node(root); | ||
995 | println("}"); | ||
996 | } | ||
997 | println("}"); | ||
998 | } | ||
999 | |||
1000 | void | ||
1001 | process_file(Str path) { | 25 | process_file(Str path) { |
1002 | Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); | 26 | Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); |
1003 | 27 | ||
diff --git a/src/parser.c b/src/parser.c index 3f15b47..84f3424 100644 --- a/src/parser.c +++ b/src/parser.c | |||
@@ -1,488 +1,970 @@ | |||
1 | #include "parser.h" | 1 | #ifndef PARSER_C |
2 | #include "darray.h" | 2 | #define PARSER_C |
3 | |||
4 | #include "badlib.h" | ||
5 | #include "lexer.c" | ||
6 | |||
7 | // | ||
8 | // Nodes | ||
9 | // | ||
10 | |||
11 | typedef enum NodeKind { | ||
12 | // Arithmetic. | ||
13 | NODE_ADD, | ||
14 | NODE_SUB, | ||
15 | NODE_DIV, | ||
16 | NODE_MUL, | ||
17 | NODE_MOD, | ||
18 | // Logical. | ||
19 | NODE_NOT, | ||
20 | NODE_AND, | ||
21 | NODE_OR, | ||
22 | NODE_EQ, | ||
23 | NODE_NEQ, | ||
24 | NODE_LT, | ||
25 | NODE_GT, | ||
26 | NODE_LE, | ||
27 | NODE_GE, | ||
28 | // Bitwise ops. | ||
29 | NODE_BITNOT, | ||
30 | NODE_BITAND, | ||
31 | NODE_BITOR, | ||
32 | NODE_BITLSHIFT, | ||
33 | NODE_BITRSHIFT, | ||
34 | // Literals. | ||
35 | NODE_STRING, | ||
36 | NODE_SYMBOL, | ||
37 | NODE_NUM_INT, | ||
38 | NODE_NUM_UINT, | ||
39 | NODE_NUM_FLOAT, | ||
40 | NODE_TRUE, | ||
41 | NODE_FALSE, | ||
42 | NODE_NIL, | ||
43 | NODE_STRUCT_LIT, | ||
44 | // Keywords. | ||
45 | NODE_LET, | ||
46 | NODE_SET, | ||
47 | NODE_STRUCT, | ||
48 | NODE_IF, | ||
49 | NODE_MATCH, | ||
50 | NODE_CASE, | ||
51 | NODE_COND, | ||
52 | NODE_ENUM, | ||
53 | NODE_BREAK, | ||
54 | NODE_CONTINUE, | ||
55 | NODE_WHILE, | ||
56 | // Helpers. | ||
57 | NODE_SYMBOL_IDX, | ||
58 | NODE_TYPE, | ||
59 | NODE_ARR_TYPE, | ||
60 | NODE_COMPOUND_TYPE, | ||
61 | NODE_VAL_FIELD, | ||
62 | NODE_BLOCK, | ||
63 | } NodeKind; | ||
64 | |||
65 | Str node_str[] = { | ||
66 | // Arithmetic. | ||
67 | [NODE_ADD] = cstr("ADD"), | ||
68 | [NODE_SUB] = cstr("SUB"), | ||
69 | [NODE_DIV] = cstr("DIV"), | ||
70 | [NODE_MUL] = cstr("MUL"), | ||
71 | [NODE_MOD] = cstr("MOD"), | ||
72 | // Logical. | ||
73 | [NODE_NOT] = cstr("NOT"), | ||
74 | [NODE_AND] = cstr("AND"), | ||
75 | [NODE_OR] = cstr("OR"), | ||
76 | [NODE_EQ] = cstr("EQ"), | ||
77 | [NODE_NEQ] = cstr("NEQ"), | ||
78 | [NODE_LT] = cstr("LT"), | ||
79 | [NODE_GT] = cstr("GT"), | ||
80 | [NODE_LE] = cstr("LE"), | ||
81 | [NODE_GE] = cstr("GE"), | ||
82 | // Bitwise ops. | ||
83 | [NODE_BITNOT] = cstr("BITNOT"), | ||
84 | [NODE_BITAND] = cstr("BITAND"), | ||
85 | [NODE_BITOR] = cstr("BITOR"), | ||
86 | [NODE_BITLSHIFT] = cstr("BITLSHIFT"), | ||
87 | [NODE_BITRSHIFT] = cstr("BITRSHIFT"), | ||
88 | // Literals. | ||
89 | [NODE_STRING] = cstr("STRING"), | ||
90 | [NODE_SYMBOL] = cstr("SYMBOL"), | ||
91 | [NODE_NUM_INT] = cstr("INT"), | ||
92 | [NODE_NUM_UINT] = cstr("UINT"), | ||
93 | [NODE_NUM_FLOAT] = cstr("FLOAT"), | ||
94 | [NODE_TRUE] = cstr("TRUE"), | ||
95 | [NODE_FALSE] = cstr("FALSE"), | ||
96 | [NODE_NIL] = cstr("NIL"), | ||
97 | [NODE_STRUCT_LIT] = cstr("STRUCT LIT"), | ||
98 | // Keywords. | ||
99 | [NODE_LET] = cstr("LET"), | ||
100 | [NODE_SET] = cstr("SET"), | ||
101 | [NODE_STRUCT] = cstr("STRUCT DEF"), | ||
102 | [NODE_IF] = cstr("IF"), | ||
103 | [NODE_MATCH] = cstr("MATCH"), | ||
104 | [NODE_CASE] = cstr("CASE"), | ||
105 | [NODE_COND] = cstr("COND"), | ||
106 | [NODE_ENUM] = cstr("ENUM"), | ||
107 | [NODE_BREAK] = cstr("BREAK"), | ||
108 | [NODE_CONTINUE] = cstr("CONTINUE"), | ||
109 | [NODE_WHILE] = cstr("WHILE"), | ||
110 | // Helpers. | ||
111 | [NODE_TYPE] = cstr("TYPE"), | ||
112 | [NODE_ARR_TYPE] = cstr("TYPE (ARR)"), | ||
113 | [NODE_SYMBOL_IDX] = cstr("SYMBOL[IDX]"), | ||
114 | [NODE_COMPOUND_TYPE] = cstr("TYPE (COMPOUND)"), | ||
115 | [NODE_VAL_FIELD] = cstr("FIELD"), | ||
116 | [NODE_BLOCK] = cstr("BLOCK"), | ||
117 | }; | ||
118 | |||
119 | typedef struct Node { | ||
120 | sz id; | ||
121 | sz line; | ||
122 | sz col; | ||
123 | |||
124 | NodeKind kind; | ||
125 | union { | ||
126 | f64 d; | ||
127 | sz i; | ||
128 | u64 u; | ||
129 | Str str; | ||
130 | Str sym; | ||
131 | } value; | ||
132 | union { | ||
133 | struct { | ||
134 | struct Node *left; | ||
135 | struct Node *right; | ||
136 | }; | ||
137 | struct { | ||
138 | struct Node *next; | ||
139 | struct Node *arr_size; | ||
140 | }; | ||
141 | struct { | ||
142 | struct Node *var_name; | ||
143 | struct Node *var_type; | ||
144 | struct Node *var_val; | ||
145 | }; | ||
146 | struct { | ||
147 | struct Node *while_cond; | ||
148 | struct Node *while_expr; | ||
149 | }; | ||
150 | struct { | ||
151 | struct Node *cond_if; | ||
152 | struct Node *cond_expr; | ||
153 | struct Node *cond_else; | ||
154 | }; | ||
155 | struct { | ||
156 | struct Node *field_name; | ||
157 | struct Node *field_type; | ||
158 | struct Node *field_val; | ||
159 | }; | ||
160 | struct { | ||
161 | struct Node *match_expr; | ||
162 | struct Node **match_cases; | ||
163 | }; | ||
164 | struct { | ||
165 | struct Node *case_value; | ||
166 | struct Node *case_expr; | ||
167 | }; | ||
168 | struct Node **struct_field; | ||
169 | struct Node **elements; | ||
170 | struct Node **statements; | ||
171 | }; | ||
172 | bool is_ptr; | ||
173 | } Node; | ||
174 | |||
175 | // | ||
176 | // Parsing. | ||
177 | // | ||
178 | |||
179 | typedef struct Parser { | ||
180 | Token current; | ||
181 | Token previous; | ||
182 | Token *tokens; | ||
183 | sz idx; | ||
184 | |||
185 | // Error handling. | ||
186 | bool err; | ||
187 | bool panic; | ||
188 | Str file_name; | ||
189 | |||
190 | // Storage. | ||
191 | Node **nodes; | ||
192 | Arena *storage; | ||
193 | } Parser; | ||
194 | |||
195 | typedef enum { | ||
196 | PREC_NONE = 0, | ||
197 | PREC_LOW, // lowest precedence | ||
198 | PREC_BITLOGIC, // & | | ||
199 | PREC_BITSHIFT, // << >> | ||
200 | PREC_OR, // || | ||
201 | PREC_AND, // && | ||
202 | PREC_EQUALITY, // == != | ||
203 | PREC_COMPARISON, // < > <= >= | ||
204 | PREC_TERM, // + - | ||
205 | PREC_FACTOR, // * / % | ||
206 | PREC_UNARY, // ! - | ||
207 | PREC_CALL, // . () | ||
208 | PREC_PRIMARY // highest precedence | ||
209 | } ParsePrecedence; | ||
210 | |||
211 | typedef void (*ParseFn)(Parser *); | ||
212 | |||
213 | typedef struct { | ||
214 | ParseFn prefix; | ||
215 | ParseFn infix; | ||
216 | ParsePrecedence precedence; | ||
217 | } ParseRule; | ||
218 | |||
219 | void parse_expr(Parser *parser, ParsePrecedence precedence); | ||
220 | void parse_advance(Parser *parser); | ||
221 | bool parse_match(Parser *parser, TokenKind kind); | ||
222 | |||
223 | void parse_grouping(Parser *parser); | ||
224 | void parse_unary(Parser *parser); | ||
225 | void parse_binary(Parser *parser); | ||
226 | void parse_number(Parser *parser); | ||
227 | void parse_literal(Parser *parser); | ||
228 | void parse_string(Parser *parser); | ||
229 | void parse_symbol(Parser *parser); | ||
230 | void parse_keyword(Parser *parser); | ||
231 | void parse_type(Parser *parser); | ||
232 | void parse_block(Parser *parser); | ||
233 | |||
234 | ParseRule parse_rules[] = { | ||
235 | [TOK_LPAREN] = {parse_grouping, NULL, PREC_NONE}, | ||
236 | [TOK_LCURLY] = {parse_block, NULL, PREC_NONE}, | ||
237 | [TOK_AT] = {parse_symbol, NULL, PREC_NONE}, | ||
238 | |||
239 | // Arithmetic. | ||
240 | [TOK_SUB] = {parse_unary, parse_binary, PREC_TERM}, | ||
241 | [TOK_ADD] = {NULL, parse_binary, PREC_TERM}, | ||
242 | [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, | ||
243 | [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, | ||
244 | [TOK_MOD] = {NULL, parse_binary, PREC_FACTOR}, | ||
245 | |||
246 | // Logical. | ||
247 | [TOK_NOT] = {parse_unary, NULL, PREC_NONE}, | ||
248 | [TOK_AND] = {NULL, parse_binary, PREC_AND}, | ||
249 | [TOK_OR] = {NULL, parse_binary, PREC_OR}, | ||
250 | [TOK_EQ] = {NULL, parse_binary, PREC_EQUALITY}, | ||
251 | [TOK_NEQ] = {NULL, parse_binary, PREC_EQUALITY}, | ||
252 | [TOK_LT] = {NULL, parse_binary, PREC_COMPARISON}, | ||
253 | [TOK_GT] = {NULL, parse_binary, PREC_COMPARISON}, | ||
254 | [TOK_LE] = {NULL, parse_binary, PREC_COMPARISON}, | ||
255 | [TOK_GE] = {NULL, parse_binary, PREC_COMPARISON}, | ||
256 | |||
257 | // Bitwise ops. | ||
258 | [TOK_BITNOT] = {parse_unary, NULL, PREC_NONE}, | ||
259 | [TOK_BITAND] = {NULL, parse_binary, PREC_BITLOGIC}, | ||
260 | [TOK_BITOR] = {NULL, parse_binary, PREC_BITLOGIC}, | ||
261 | [TOK_BITLSHIFT] = {NULL, parse_binary, PREC_BITSHIFT}, | ||
262 | [TOK_BITRSHIFT] = {NULL, parse_binary, PREC_BITSHIFT}, | ||
263 | |||
264 | // Literals. | ||
265 | [TOK_STRING] = {parse_string, NULL, PREC_NONE}, | ||
266 | [TOK_SYMBOL] = {parse_symbol, NULL, PREC_NONE}, | ||
267 | [TOK_CHAR] = {parse_number, NULL, PREC_NONE}, | ||
268 | [TOK_NUM_INT] = {parse_number, NULL, PREC_NONE}, | ||
269 | [TOK_NUM_FLOAT] = {parse_number, NULL, PREC_NONE}, | ||
270 | [TOK_TRUE] = {parse_literal, NULL, PREC_NONE}, | ||
271 | [TOK_FALSE] = {parse_literal, NULL, PREC_NONE}, | ||
272 | [TOK_NIL] = {parse_literal, NULL, PREC_NONE}, | ||
273 | |||
274 | // Statements. | ||
275 | [TOK_LET] = {parse_keyword, NULL, PREC_NONE}, | ||
276 | [TOK_SET] = {parse_keyword, NULL, PREC_NONE}, | ||
277 | [TOK_STRUCT] = {parse_keyword, NULL, PREC_NONE}, | ||
278 | [TOK_ENUM] = {parse_keyword, NULL, PREC_NONE}, | ||
279 | [TOK_IF] = {parse_keyword, NULL, PREC_NONE}, | ||
280 | [TOK_MATCH] = {parse_keyword, NULL, PREC_NONE}, | ||
281 | [TOK_COND] = {parse_keyword, NULL, PREC_NONE}, | ||
282 | [TOK_WHILE] = {parse_keyword, NULL, PREC_NONE}, | ||
283 | [TOK_CONTINUE] = {parse_keyword, NULL, PREC_NONE}, | ||
284 | [TOK_BREAK] = {parse_keyword, NULL, PREC_NONE}, | ||
285 | [TOK_FUN] = {parse_keyword, NULL, PREC_NONE}, | ||
286 | [TOK_RETURN] = {parse_keyword, NULL, PREC_NONE}, | ||
287 | |||
288 | [TOK_EOF] = {NULL, NULL, PREC_NONE}, | ||
289 | }; | ||
3 | 290 | ||
4 | Token | 291 | Node * |
5 | next_token(Parser *parser) { | 292 | node_alloc(Parser *parser, NodeKind kind, Token tok) { |
6 | return parser->tokens[parser->current_token++]; | 293 | if (parser->panic) return NULL; |
294 | static sz id = 0; | ||
295 | Node *node = arena_calloc((sz)sizeof(Node), parser->storage); | ||
296 | node->id = id++; | ||
297 | node->kind = kind; | ||
298 | node->line = tok.line; | ||
299 | node->col = tok.col; | ||
300 | return node; | ||
7 | } | 301 | } |
8 | 302 | ||
9 | Token | 303 | void |
10 | peek_token(Parser *parser) { | 304 | parse_emit_err(Parser *parser, Token token, Str msg) { |
11 | return parser->tokens[parser->current_token]; | 305 | if (parser->panic) return; |
12 | } | 306 | parser->panic = true; |
307 | parser->err = true; | ||
308 | eprint("%s:%d:%d: error: %s", parser->file_name, token.line, token.col, | ||
309 | msg); | ||
13 | 310 | ||
14 | bool | 311 | if (token.kind == TOK_EOF) { |
15 | has_next(Parser *parser) { | 312 | eprintln(" at end of the file"); |
16 | return parser->current_token < array_size(parser->tokens); | 313 | } else if (token.kind != TOK_UNKNOWN) { |
314 | eprintln(" at %s", token.val); | ||
315 | } | ||
17 | } | 316 | } |
18 | 317 | ||
19 | bool | 318 | void |
20 | consume_rparen(Parser *parser) { | 319 | parse_advance(Parser *parser) { |
21 | Token tok = next_token(parser); | 320 | assert(parser); |
22 | if (tok.type != TOKEN_RPAREN) { | 321 | parser->previous = parser->current; |
23 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_RPAREN, tok.line, tok.col); | 322 | if (parser->previous.kind == TOK_EOF) { |
24 | return false; | 323 | return; |
25 | } | 324 | } |
26 | return true; | 325 | parser->current = parser->tokens[parser->idx++]; |
27 | } | 326 | } |
28 | 327 | ||
29 | bool | 328 | bool |
30 | consume_lparen(Parser *parser) { | 329 | parse_match(Parser *parser, TokenKind kind) { |
31 | Token tok = next_token(parser); | 330 | assert(parser); |
32 | if (tok.type != TOKEN_LPAREN) { | 331 | if (parser->current.kind != kind) { |
33 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_LPAREN, tok.line, tok.col); | ||
34 | return false; | 332 | return false; |
35 | } | 333 | } |
334 | parse_advance(parser); | ||
36 | return true; | 335 | return true; |
37 | } | 336 | } |
38 | 337 | ||
39 | Node * | 338 | static void |
40 | parse_number(Parser *parser) { | 339 | parse_consume(Parser *parser, TokenKind kind, Str msg) { |
41 | Token tok = next_token(parser); | 340 | if (parser->current.kind == kind) { |
42 | if (tok.type != TOKEN_NUMBER) { | 341 | parse_advance(parser); |
43 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_NUMBER, tok.line, tok.col); | 342 | return; |
44 | return NULL; | ||
45 | } | ||
46 | |||
47 | bool negative = false; | ||
48 | int base = 10; | ||
49 | char c = sv_next(&tok.value); | ||
50 | if (c == '-') { | ||
51 | negative = true; | ||
52 | c = sv_next(&tok.value); | ||
53 | } | ||
54 | if (c == '+') { | ||
55 | c = sv_next(&tok.value); | ||
56 | } | ||
57 | if (c == '0' && sv_peek(&tok.value) != '\0') { | ||
58 | c = sv_next(&tok.value); | ||
59 | if (c == 'x') { | ||
60 | base = 16; | ||
61 | c = sv_next(&tok.value); | ||
62 | } else if (c == 'b') { | ||
63 | base = 2; | ||
64 | c = sv_next(&tok.value); | ||
65 | } else if (!(c >= '0' && c <= '9')){ | ||
66 | push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); | ||
67 | return NULL; | ||
68 | } | ||
69 | } | ||
70 | |||
71 | // Integral part. | ||
72 | u64 integral = 0; | ||
73 | while (c != '\0') { | ||
74 | ssize_t current = 0; | ||
75 | if (c >= 'a' && c <= 'z' && base == 16) { | ||
76 | current = (c - 'a') + 10; | ||
77 | } else if (c >= 'A' && c <= 'Z' && base == 16) { | ||
78 | current = (c - 'A') + 10; | ||
79 | } else if (c >= '0' && c <= '9') { | ||
80 | current = (c - '0'); | ||
81 | } else if (c == '.') { | ||
82 | c = sv_next(&tok.value); | ||
83 | break; | ||
84 | } else { | ||
85 | push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); | ||
86 | return NULL; | ||
87 | } | ||
88 | integral = integral * base + current; | ||
89 | c = sv_next(&tok.value); | ||
90 | } | ||
91 | |||
92 | // Fractional part. | ||
93 | u64 fractional = 0; | ||
94 | while (c != '\0') { | ||
95 | ssize_t current = 0; | ||
96 | if (c >= 'a' && c <= 'z' && base == 16) { | ||
97 | current = (c - 'a') + 10; | ||
98 | } else if (c >= 'A' && c <= 'Z' && base == 16) { | ||
99 | current = (c - 'A') + 10; | ||
100 | } else if (c >= '0' && c <= '9') { | ||
101 | current = (c - '0'); | ||
102 | } else { | ||
103 | push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); | ||
104 | return NULL; | ||
105 | } | ||
106 | fractional = fractional * base + current; | ||
107 | c = sv_next(&tok.value); | ||
108 | } | 343 | } |
109 | 344 | parse_emit_err(parser, parser->current, msg); | |
110 | Node * node = alloc_node(NODE_NUMBER); | ||
111 | node->number.negative = negative; | ||
112 | node->number.integral = integral; | ||
113 | node->number.fractional = fractional; | ||
114 | node->line = tok.line; | ||
115 | node->col = tok.col; | ||
116 | return node; | ||
117 | } | 345 | } |
118 | 346 | ||
119 | Node * | 347 | void |
120 | parse_string(Parser *parser) { | 348 | parse_block(Parser *parser) { |
121 | Token tok = next_token(parser); | 349 | #if DEBUG == 1 |
122 | if (tok.type != TOKEN_STRING) { | 350 | print("parsing block "); |
123 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_STRING, tok.line, tok.col); | 351 | print_token(parser->previous); |
124 | return NULL; | 352 | #endif |
125 | } | 353 | Node *block = node_alloc(parser, NODE_BLOCK, parser->previous); |
126 | Node *node = alloc_node(NODE_STRING); | 354 | if (!block) return; |
127 | node->string = tok.value; | 355 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { |
128 | node->line = tok.line; | 356 | parse_expr(parser, PREC_LOW); |
129 | node->col = tok.col; | 357 | Node *next = array_pop(parser->nodes); |
130 | return node; | 358 | array_push(block->statements, next, parser->storage); |
359 | } | ||
360 | array_push(parser->nodes, block, parser->storage); | ||
131 | } | 361 | } |
132 | 362 | ||
133 | Node * | 363 | void |
134 | parse_symbol(Parser *parser) { | 364 | parse_expr(Parser *parser, ParsePrecedence precedence) { |
135 | Token tok = next_token(parser); | 365 | parse_advance(parser); |
136 | if (tok.type != TOKEN_SYMBOL) { | 366 | // TODO: synchronize panic mode on keywords. |
137 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_SYMBOL, tok.line, tok.col); | 367 | if (parser->panic) return; |
138 | return NULL; | 368 | ParseFn prefix = parse_rules[parser->previous.kind].prefix; |
369 | if (prefix == NULL) { | ||
370 | parse_emit_err(parser, parser->previous, cstr("expected expression")); | ||
371 | return; | ||
372 | } | ||
373 | prefix(parser); | ||
374 | |||
375 | while (precedence <= parse_rules[parser->current.kind].precedence) { | ||
376 | parse_advance(parser); | ||
377 | ParseFn infix = parse_rules[parser->previous.kind].infix; | ||
378 | if (infix == NULL) { | ||
379 | parse_emit_err(parser, parser->previous, | ||
380 | cstr("expected expression")); | ||
381 | return; | ||
382 | } | ||
383 | infix(parser); | ||
139 | } | 384 | } |
140 | Node *node = alloc_node(NODE_SYMBOL); | ||
141 | node->string = tok.value; | ||
142 | node->line = tok.line; | ||
143 | node->col = tok.col; | ||
144 | return node; | ||
145 | } | 385 | } |
146 | 386 | ||
147 | Node * | 387 | void |
148 | parse_type(Parser *parser) { | 388 | parse_unary(Parser *parser) { |
149 | Token tok = next_token(parser); | 389 | if (parser->panic) return; |
150 | if (tok.type != TOKEN_COLON) { | 390 | Token prev = parser->previous; |
151 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); | 391 | #if DEBUG == 1 |
152 | return NULL; | 392 | print("parsing unary "); |
153 | } | 393 | print_token(prev); |
154 | Node *node = alloc_node(NODE_TYPE); | 394 | #endif |
155 | node->line = tok.line; | 395 | parse_expr(parser, PREC_UNARY); |
156 | node->col = tok.col; | 396 | Node *node = NULL; |
157 | tok = next_token(parser); | 397 | switch (prev.kind) { |
158 | if (tok.type != TOKEN_SYMBOL) { | 398 | case TOK_NOT: node = node_alloc(parser, NODE_NOT, prev); break; |
159 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); | 399 | case TOK_BITNOT: node = node_alloc(parser, NODE_BITNOT, prev); break; |
160 | return NULL; | 400 | default: break; // Unreachable. |
161 | } | 401 | } |
162 | node->string = tok.value; | 402 | if (!node) return; |
163 | return node; | 403 | node->left = array_pop(parser->nodes); |
404 | array_push(parser->nodes, node, parser->storage); | ||
164 | } | 405 | } |
165 | 406 | ||
166 | Node * | 407 | void |
167 | parse_bool(Parser *parser) { | 408 | parse_literal(Parser *parser) { |
168 | Token tok = next_token(parser); | 409 | if (parser->panic) return; |
169 | if (!(tok.type == TOKEN_TRUE || tok.type == TOKEN_FALSE)) { | 410 | Token prev = parser->previous; |
170 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_BOOL, tok.line, tok.col); | 411 | #if DEBUG == 1 |
171 | return NULL; | 412 | print("parsing literal "); |
172 | } | 413 | print_token(prev); |
173 | Node *node = alloc_node(NODE_BOOL); | 414 | #endif |
174 | node->boolean = tok.type == TOKEN_TRUE; | 415 | Node *node = NULL; |
175 | node->line = tok.line; | 416 | switch (prev.kind) { |
176 | node->col = tok.col; | 417 | case TOK_TRUE: node = node_alloc(parser, NODE_TRUE, prev); break; |
177 | return node; | 418 | case TOK_FALSE: node = node_alloc(parser, NODE_FALSE, prev); break; |
419 | case TOK_NIL: node = node_alloc(parser, NODE_NIL, prev); break; | ||
420 | default: return; // Unreachable. | ||
421 | } | ||
422 | if (!node) return; | ||
423 | array_push(parser->nodes, node, parser->storage); | ||
178 | } | 424 | } |
179 | 425 | ||
180 | Node * | 426 | void |
181 | parse_builtin(Parser *parser) { | 427 | parse_type(Parser *parser) { |
182 | Token tok = next_token(parser); | 428 | Token prev = parser->previous; |
183 | Node *node = alloc_node(NODE_BUILTIN); | 429 | #if DEBUG == 1 |
184 | node->builtin.type = tok.type; | 430 | print("parsing type "); |
185 | node->line = tok.line; | 431 | print_token(prev); |
186 | node->col = tok.col; | 432 | #endif |
187 | array_init(node->builtin.args, 0); | 433 | Node *node = node_alloc(parser, NODE_TYPE, prev); |
188 | while (has_next(parser)) { | 434 | if (!node) return; |
189 | Token next = peek_token(parser); | 435 | if (parse_match(parser, TOK_AT)) { |
190 | if (next.type == TOKEN_RPAREN) { | 436 | node->is_ptr = true; |
191 | next_token(parser); | 437 | } |
192 | return node; | 438 | if (parse_match(parser, TOK_LCURLY)) { |
439 | node->kind = NODE_COMPOUND_TYPE; | ||
440 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
441 | parse_consume(parser, TOK_SYMBOL, cstr("expected name")); | ||
442 | parse_symbol(parser); | ||
443 | Node *next = array_pop(parser->nodes); | ||
444 | parse_consume(parser, TOK_COLON, cstr("incomplete type")); | ||
445 | parse_type(parser); | ||
446 | next->next = array_pop(parser->nodes); | ||
447 | array_push(node->elements, next, parser->storage); | ||
193 | } | 448 | } |
194 | Node *arg = parse_next(parser); | 449 | } else { |
195 | if (arg == NULL) { | 450 | parse_consume(parser, TOK_SYMBOL, |
196 | break; | 451 | cstr("no type given for struct field")); |
452 | node->value.sym = parser->previous.val; | ||
453 | // Optional array value? | ||
454 | if (parse_match(parser, TOK_LSQUARE)) { | ||
455 | node->kind = NODE_ARR_TYPE, | ||
456 | parse_consume(parser, TOK_NUM_INT, cstr("no array size given")); | ||
457 | parse_number(parser); | ||
458 | node->arr_size = array_pop(parser->nodes); | ||
459 | parse_consume(parser, TOK_RSQUARE, | ||
460 | cstr("unmatched brackets ']' in array type")); | ||
197 | } | 461 | } |
198 | array_push(node->builtin.args, arg); | ||
199 | } | 462 | } |
200 | push_error(ERR_TYPE_PARSER, ERR_UNMATCHED_PAREN, tok.line, tok.col); | 463 | array_push(parser->nodes, node, parser->storage); |
201 | return NULL; | ||
202 | } | 464 | } |
203 | 465 | ||
204 | Node * | 466 | void |
205 | parse_funcall(Parser *parser) { | 467 | parse_keyword(Parser *parser) { |
206 | Token tok = peek_token(parser); | 468 | if (parser->panic) return; |
207 | Node *name = parse_symbol(parser); | 469 | Token prev = parser->previous; |
208 | if (name == NULL) { | 470 | #if DEBUG == 1 |
209 | return NULL; | 471 | print("parsing keyword "); |
210 | } | 472 | print_token(prev); |
211 | Node *node = alloc_node(NODE_FUNCALL); | 473 | #endif |
212 | node->funcall.name = name; | 474 | Node *node = NULL; |
213 | node->line = tok.line; | 475 | switch (prev.kind) { |
214 | node->col = tok.col; | 476 | case TOK_LET: { |
215 | array_init(node->funcall.args, 0); | 477 | node = node_alloc(parser, NODE_LET, prev); |
216 | while (has_next(parser)) { | 478 | if (!node) return; |
217 | Token next = peek_token(parser); | 479 | parse_consume(parser, TOK_SYMBOL, |
218 | if (next.type == TOKEN_RPAREN) { | 480 | cstr("expected symbol name on let expression")); |
219 | next_token(parser); | 481 | parse_symbol(parser); |
220 | return node; | 482 | node->var_name = array_pop(parser->nodes); |
221 | } | 483 | if (node->var_name->next) { |
222 | Node *arg = parse_next(parser); | 484 | parse_emit_err(parser, prev, |
223 | if (arg == NULL) { | 485 | cstr("invalid symbol name in let expression")); |
224 | break; | 486 | return; |
225 | } | 487 | } |
226 | array_push(node->funcall.args, arg); | 488 | |
489 | // Optional type declaration. | ||
490 | if (parse_match(parser, TOK_COLON)) { | ||
491 | parse_type(parser); | ||
492 | node->var_type = array_pop(parser->nodes); | ||
493 | } | ||
494 | |||
495 | // Optional assignment. | ||
496 | if (parse_match(parser, TOK_ASSIGN)) { | ||
497 | parse_expr(parser, PREC_LOW); | ||
498 | node->var_val = array_pop(parser->nodes); | ||
499 | } | ||
500 | } break; | ||
501 | case TOK_SET: { | ||
502 | node = node_alloc(parser, NODE_SET, prev); | ||
503 | if (!node) return; | ||
504 | parse_consume(parser, TOK_SYMBOL, | ||
505 | cstr("expected symbol name on let expression")); | ||
506 | parse_symbol(parser); | ||
507 | node->var_name = array_pop(parser->nodes); | ||
508 | parse_consume(parser, TOK_ASSIGN, | ||
509 | cstr("expected assignment on set expression")); | ||
510 | parse_expr(parser, PREC_LOW); | ||
511 | node->var_val = array_pop(parser->nodes); | ||
512 | } break; | ||
513 | case TOK_STRUCT: { | ||
514 | node = node_alloc(parser, NODE_STRUCT, prev); | ||
515 | if (!node) return; | ||
516 | parse_consume(parser, TOK_SYMBOL, | ||
517 | cstr("expected symbol name on struct definition")); | ||
518 | // Just consume this to avoid conflicts with struct literals. | ||
519 | node->value.sym = parser->previous.val; | ||
520 | |||
521 | parse_consume(parser, TOK_LCURLY, | ||
522 | cstr("expected '{' on struct definition")); | ||
523 | |||
524 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
525 | Node *field = | ||
526 | node_alloc(parser, NODE_VAL_FIELD, parser->current); | ||
527 | if (!field) return; | ||
528 | parse_consume( | ||
529 | parser, TOK_SYMBOL, | ||
530 | cstr("expected symbol name on struct definition")); | ||
531 | parse_symbol(parser); | ||
532 | field->field_name = array_pop(parser->nodes); | ||
533 | if (field->field_name->next || | ||
534 | field->field_name->kind == NODE_SYMBOL_IDX) { | ||
535 | parse_emit_err(parser, prev, | ||
536 | cstr("invalid symbol name in struct field")); | ||
537 | return; | ||
538 | } | ||
539 | parse_consume( | ||
540 | parser, TOK_COLON, | ||
541 | cstr("expected symbol name on struct definition")); | ||
542 | parse_type(parser); | ||
543 | field->field_type = array_pop(parser->nodes); | ||
544 | |||
545 | // Optional assignment. | ||
546 | if (parse_match(parser, TOK_ASSIGN)) { | ||
547 | parse_expr(parser, PREC_LOW); | ||
548 | field->field_val = array_pop(parser->nodes); | ||
549 | } | ||
550 | |||
551 | array_push(node->struct_field, field, parser->storage); | ||
552 | } | ||
553 | } break; | ||
554 | case TOK_IF: { | ||
555 | node = node_alloc(parser, NODE_IF, prev); | ||
556 | if (!node) return; | ||
557 | parse_consume(parser, TOK_LPAREN, | ||
558 | cstr("expected '(' on if expression")); | ||
559 | parse_expr(parser, PREC_LOW); | ||
560 | node->cond_if = array_pop(parser->nodes); | ||
561 | parse_consume(parser, TOK_RPAREN, | ||
562 | cstr("expected ')' on if expression")); | ||
563 | parse_expr(parser, PREC_LOW); | ||
564 | node->cond_expr = array_pop(parser->nodes); | ||
565 | if (parse_match(parser, TOK_ELSE)) { | ||
566 | parse_expr(parser, PREC_LOW); | ||
567 | node->cond_else = array_pop(parser->nodes); | ||
568 | } | ||
569 | } break; | ||
570 | case TOK_MATCH: { | ||
571 | node = node_alloc(parser, NODE_MATCH, prev); | ||
572 | if (!node) return; | ||
573 | parse_consume(parser, TOK_LPAREN, | ||
574 | cstr("expected '(' on if expression")); | ||
575 | parse_expr(parser, PREC_LOW); | ||
576 | node->match_expr = array_pop(parser->nodes); | ||
577 | parse_consume(parser, TOK_RPAREN, | ||
578 | cstr("expected ')' on if expression")); | ||
579 | parse_consume(parser, TOK_LCURLY, | ||
580 | cstr("expected block of match cases")); | ||
581 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
582 | Node *tmp = node_alloc(parser, NODE_CASE, parser->previous); | ||
583 | if (!tmp) return; | ||
584 | // Are we on the default case. | ||
585 | if (!parse_match(parser, TOK_ELSE)) { | ||
586 | parse_consume(parser, TOK_CASE, | ||
587 | cstr("expected case statement")); | ||
588 | parse_expr(parser, PREC_LOW); | ||
589 | tmp->case_value = array_pop(parser->nodes); | ||
590 | } | ||
591 | parse_consume(parser, TOK_ASSIGN, | ||
592 | cstr("malformed case statement")); | ||
593 | parse_expr(parser, PREC_LOW); | ||
594 | tmp->case_expr = array_pop(parser->nodes); | ||
595 | array_push(node->match_cases, tmp, parser->storage); | ||
596 | } | ||
597 | // TODO: Check that we only have literals on the match case, this | ||
598 | // could be done on the analysis step, but also here... | ||
599 | // TODO: Check that there are no multiple default or duplicated | ||
600 | // cases. | ||
601 | } break; | ||
602 | case TOK_ENUM: { | ||
603 | node = node_alloc(parser, NODE_ENUM, prev); | ||
604 | if (!node) return; | ||
605 | parse_consume(parser, TOK_SYMBOL, | ||
606 | cstr("expected symbol name on enum definition")); | ||
607 | // Just consume this to avoid conflicts with struct literals. | ||
608 | node->value.sym = parser->previous.val; | ||
609 | parse_consume(parser, TOK_LCURLY, | ||
610 | cstr("expected '{' on enum definition")); | ||
611 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
612 | Node *field = | ||
613 | node_alloc(parser, NODE_VAL_FIELD, parser->current); | ||
614 | if (!field) return; | ||
615 | parse_consume(parser, TOK_SYMBOL, | ||
616 | cstr("expected symbol name on enum definition")); | ||
617 | parse_symbol(parser); | ||
618 | field->field_name = array_pop(parser->nodes); | ||
619 | if (field->field_name->next || | ||
620 | field->field_name->kind == NODE_SYMBOL_IDX) { | ||
621 | parse_emit_err(parser, prev, | ||
622 | cstr("invalid symbol name in enum field")); | ||
623 | return; | ||
624 | } | ||
625 | if (parse_match(parser, TOK_ASSIGN)) { | ||
626 | parse_expr(parser, PREC_LOW); | ||
627 | field->field_val = array_pop(parser->nodes); | ||
628 | } | ||
629 | array_push(node->struct_field, field, parser->storage); | ||
630 | } | ||
631 | } break; | ||
632 | case TOK_COND: { | ||
633 | node = node_alloc(parser, NODE_COND, prev); | ||
634 | if (!node) return; | ||
635 | parse_consume(parser, TOK_LCURLY, | ||
636 | cstr("expected '{' on cond expression")); | ||
637 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
638 | Node *tmp = node_alloc(parser, NODE_CASE, parser->previous); | ||
639 | if (!tmp) return; | ||
640 | // Are we on the default case. | ||
641 | if (!parse_match(parser, TOK_ELSE)) { | ||
642 | parse_consume(parser, TOK_CASE, | ||
643 | cstr("expected case statement")); | ||
644 | parse_expr(parser, PREC_LOW); | ||
645 | tmp->case_value = array_pop(parser->nodes); | ||
646 | } | ||
647 | parse_consume(parser, TOK_ASSIGN, | ||
648 | cstr("malformed case statement")); | ||
649 | parse_expr(parser, PREC_LOW); | ||
650 | tmp->case_expr = array_pop(parser->nodes); | ||
651 | array_push(node->match_cases, tmp, parser->storage); | ||
652 | } | ||
653 | } break; | ||
654 | case TOK_BREAK: { | ||
655 | node = node_alloc(parser, NODE_BREAK, prev); | ||
656 | if (!node) return; | ||
657 | } break; | ||
658 | case TOK_CONTINUE: { | ||
659 | node = node_alloc(parser, NODE_CONTINUE, prev); | ||
660 | if (!node) return; | ||
661 | } break; | ||
662 | case TOK_WHILE: { | ||
663 | node = node_alloc(parser, NODE_WHILE, prev); | ||
664 | if (!node) return; | ||
665 | parse_consume(parser, TOK_LPAREN, | ||
666 | cstr("expected '(' on while expression")); | ||
667 | parse_expr(parser, PREC_LOW); | ||
668 | node->while_cond = array_pop(parser->nodes); | ||
669 | parse_consume(parser, TOK_RPAREN, | ||
670 | cstr("expected ')' on while expression")); | ||
671 | parse_expr(parser, PREC_LOW); | ||
672 | node->while_expr = array_pop(parser->nodes); | ||
673 | } break; | ||
674 | default: return; // Unreachable. | ||
227 | } | 675 | } |
228 | push_error(ERR_TYPE_PARSER, ERR_UNMATCHED_PAREN, tok.line, tok.col); | 676 | array_push(parser->nodes, node, parser->storage); |
229 | return NULL; | ||
230 | } | 677 | } |
231 | 678 | ||
232 | Node * | 679 | void |
233 | parse_def(Parser *parser) { | 680 | parse_binary(Parser *parser) { |
234 | Token tok = next_token(parser); | 681 | if (parser->panic) return; |
235 | 682 | Token prev = parser->previous; | |
236 | Node *symbol = parse_symbol(parser); | 683 | #if DEBUG == 1 |
237 | if (symbol == NULL) { | 684 | print("parsing binary "); |
238 | return NULL; | 685 | print_token(prev); |
239 | } | 686 | #endif |
240 | 687 | ParseRule rule = parse_rules[prev.kind]; | |
241 | // TODO: Making type definitions mandatory for now until we introduce type | 688 | parse_expr(parser, rule.precedence + 1); |
242 | // inference. | 689 | |
243 | Node *type = parse_type(parser); | 690 | Node *node; |
244 | if (type == NULL) { | 691 | switch (prev.kind) { |
245 | return NULL; | 692 | case TOK_ADD: node = node_alloc(parser, NODE_ADD, prev); break; |
246 | } | 693 | case TOK_SUB: node = node_alloc(parser, NODE_SUB, prev); break; |
247 | 694 | case TOK_MUL: node = node_alloc(parser, NODE_MUL, prev); break; | |
248 | Node *value = parse_next(parser); | 695 | case TOK_DIV: node = node_alloc(parser, NODE_DIV, prev); break; |
249 | if (value == NULL) { | 696 | case TOK_MOD: node = node_alloc(parser, NODE_MOD, prev); break; |
250 | return NULL; | 697 | case TOK_AND: node = node_alloc(parser, NODE_AND, prev); break; |
251 | } | 698 | case TOK_OR: node = node_alloc(parser, NODE_OR, prev); break; |
252 | 699 | case TOK_EQ: node = node_alloc(parser, NODE_EQ, prev); break; | |
253 | if (!consume_rparen(parser)) { | 700 | case TOK_NEQ: node = node_alloc(parser, NODE_NEQ, prev); break; |
254 | return NULL; | 701 | case TOK_LT: node = node_alloc(parser, NODE_LT, prev); break; |
702 | case TOK_GT: node = node_alloc(parser, NODE_GT, prev); break; | ||
703 | case TOK_LE: node = node_alloc(parser, NODE_LE, prev); break; | ||
704 | case TOK_GE: node = node_alloc(parser, NODE_GE, prev); break; | ||
705 | case TOK_BITAND: { | ||
706 | node = node_alloc(parser, NODE_BITAND, prev); | ||
707 | } break; | ||
708 | case TOK_BITOR: { | ||
709 | node = node_alloc(parser, NODE_BITOR, prev); | ||
710 | } break; | ||
711 | case TOK_BITLSHIFT: { | ||
712 | node = node_alloc(parser, NODE_BITLSHIFT, prev); | ||
713 | } break; | ||
714 | case TOK_BITRSHIFT: { | ||
715 | node = node_alloc(parser, NODE_BITRSHIFT, prev); | ||
716 | } break; | ||
717 | default: { | ||
718 | parse_emit_err(parser, prev, cstr("unreachable")); | ||
719 | return; | ||
720 | } | ||
255 | } | 721 | } |
256 | 722 | if (!node) return; | |
257 | Node *node = alloc_node(NODE_DEF); | 723 | node->right = array_pop(parser->nodes); |
258 | node->def.symbol = symbol; | 724 | node->left = array_pop(parser->nodes); |
259 | node->def.value = value; | 725 | array_push(parser->nodes, node, parser->storage); |
260 | node->def.type = type; | ||
261 | node->line = tok.line; | ||
262 | node->col = tok.col; | ||
263 | return node; | ||
264 | } | 726 | } |
265 | 727 | ||
266 | Node * | 728 | void |
267 | parse_set(Parser *parser) { | 729 | parse_number(Parser *parser) { |
268 | Token tok = next_token(parser); | 730 | if (parser->panic) return; |
269 | 731 | Token prev = parser->previous; | |
270 | Node *symbol = parse_symbol(parser); | 732 | #if DEBUG == 1 |
271 | if (symbol == NULL) { | 733 | print("parsing number "); |
272 | return NULL; | 734 | print_token(prev); |
273 | } | 735 | #endif |
274 | 736 | Node *node = NULL; | |
275 | Node *value = parse_next(parser); | 737 | switch (prev.kind) { |
276 | if (value == NULL) { | 738 | case TOK_NUM_INT: { |
277 | return NULL; | 739 | if (str_has_prefix(prev.val, cstr("0x")) || |
278 | } | 740 | str_has_prefix(prev.val, cstr("0b"))) { |
279 | 741 | node = node_alloc(parser, NODE_NUM_UINT, prev); | |
280 | if (!consume_rparen(parser)) { | 742 | if (!node) return; |
281 | return NULL; | 743 | node->value.u = str_to_uint(prev.val); |
744 | } else { | ||
745 | node = node_alloc(parser, NODE_NUM_INT, prev); | ||
746 | if (!node) return; | ||
747 | node->value.i = str_to_int(prev.val); | ||
748 | } | ||
749 | } break; | ||
750 | case TOK_NUM_FLOAT: { | ||
751 | node = node_alloc(parser, NODE_NUM_FLOAT, prev); | ||
752 | if (!node) return; | ||
753 | node->value.d = str_to_float(prev.val); | ||
754 | } break; | ||
755 | case TOK_CHAR: { | ||
756 | node = node_alloc(parser, NODE_NUM_INT, prev); | ||
757 | if (!node) return; | ||
758 | node->value.i = prev.val.mem[1]; | ||
759 | } break; | ||
760 | default: break; | ||
282 | } | 761 | } |
283 | 762 | array_push(parser->nodes, node, parser->storage); | |
284 | Node *node = alloc_node(NODE_SET); | ||
285 | node->set.symbol = symbol; | ||
286 | node->set.value = value; | ||
287 | node->line = tok.line; | ||
288 | node->col = tok.col; | ||
289 | return node; | ||
290 | } | 763 | } |
291 | 764 | ||
292 | Node * | 765 | void |
293 | parse_fun(Parser *parser) { | 766 | parse_string(Parser *parser) { |
294 | Token tok = next_token(parser); | 767 | if (parser->panic) return; |
295 | 768 | Token prev = parser->previous; | |
296 | Node *name = parse_symbol(parser); | 769 | #if DEBUG == 1 |
297 | if (name == NULL) { | 770 | print("parsing string "); |
298 | return NULL; | 771 | print_token(prev); |
299 | } | 772 | #endif |
300 | 773 | Node *node = node_alloc(parser, NODE_STRING, prev); | |
301 | Node *node = alloc_node(NODE_FUN); | 774 | if (!node) return; |
302 | node->fun.name = name; | 775 | node->value.str = str_remove_prefix(prev.val, cstr("\"")); |
303 | array_init(node->fun.param_names, 0); | 776 | node->value.str = str_remove_suffix(node->value.str, cstr("\"")); |
304 | array_init(node->fun.param_types, 0); | 777 | array_push(parser->nodes, node, parser->storage); |
305 | node->line = tok.line; | 778 | } |
306 | node->col = tok.col; | ||
307 | 779 | ||
308 | // Parse parameter list and return type. | 780 | void |
309 | if (!consume_lparen(parser)) { | 781 | parse_symbol(Parser *parser) { |
310 | return NULL; | 782 | if (parser->panic) return; |
311 | } | 783 | Token prev = parser->previous; |
312 | while (true) { | 784 | #if DEBUG == 1 |
313 | Token next = peek_token(parser); | 785 | print("parsing symbol "); |
314 | if (next.type == TOKEN_RPAREN) { | 786 | print_token(prev); |
315 | next_token(parser); | 787 | #endif |
316 | break; | 788 | if (prev.kind == TOK_AT) { |
789 | parse_consume(parser, TOK_SYMBOL, | ||
790 | cstr("expected symbol after '.' operator")); | ||
791 | parse_symbol(parser); | ||
792 | Node *node = array_pop(parser->nodes); | ||
793 | if (node) { | ||
794 | node->is_ptr = true; | ||
795 | array_push(parser->nodes, node, parser->storage); | ||
317 | } | 796 | } |
318 | Node *name = parse_symbol(parser); | 797 | return; |
319 | if (name == NULL) { | 798 | } |
320 | return NULL; | 799 | Node *node = node_alloc(parser, NODE_SYMBOL, prev); |
800 | if (!node) return; | ||
801 | if (parse_match(parser, TOK_DOT)) { | ||
802 | // Symbol chain. | ||
803 | parse_consume(parser, TOK_SYMBOL, | ||
804 | cstr("expected symbol after '.' operator")); | ||
805 | parse_symbol(parser); | ||
806 | node->next = array_pop(parser->nodes); | ||
807 | } else if (parse_match(parser, TOK_LCURLY)) { | ||
808 | // Struct literal. | ||
809 | node->kind = NODE_STRUCT_LIT; | ||
810 | while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { | ||
811 | Node *field = node_alloc(parser, NODE_VAL_FIELD, parser->current); | ||
812 | parse_consume(parser, TOK_SYMBOL, cstr("expected symbol name")); | ||
813 | parse_symbol(parser); | ||
814 | field->field_name = array_pop(parser->nodes); | ||
815 | parse_consume(parser, TOK_ASSIGN, cstr("expected assignment")); | ||
816 | parse_expr(parser, PREC_LOW); | ||
817 | field->field_type = array_pop(parser->nodes); | ||
818 | array_push(node->elements, field, parser->storage); | ||
321 | } | 819 | } |
322 | Node *type = parse_type(parser); | 820 | } else if (parse_match(parser, TOK_LSQUARE)) { |
323 | if (type == NULL) { | 821 | node->kind = NODE_SYMBOL_IDX; |
324 | return NULL; | 822 | parse_consume(parser, TOK_NUM_INT, cstr("no array size given")); |
823 | parse_number(parser); | ||
824 | node->arr_size = array_pop(parser->nodes); | ||
825 | parse_consume(parser, TOK_RSQUARE, | ||
826 | cstr("unmatched brackets ']' in array type")); | ||
827 | if (parse_match(parser, TOK_DOT)) { | ||
828 | // Symbol chain. | ||
829 | parse_consume(parser, TOK_SYMBOL, | ||
830 | cstr("expected symbol after '.' operator")); | ||
831 | parse_symbol(parser); | ||
832 | node->next = array_pop(parser->nodes); | ||
325 | } | 833 | } |
326 | array_push(node->fun.param_names, name); | 834 | } else { |
327 | array_push(node->fun.param_types, type); | 835 | node = node_alloc(parser, NODE_SYMBOL, prev); |
328 | } | 836 | if (!node) return; |
329 | Node *ret_type = parse_type(parser); | ||
330 | if (ret_type == NULL) { | ||
331 | return NULL; | ||
332 | } | ||
333 | node->fun.return_type = ret_type; | ||
334 | |||
335 | Node *body = parse_next(parser); | ||
336 | if (body == NULL) { | ||
337 | return NULL; | ||
338 | } | ||
339 | node->fun.body = body; | ||
340 | if (!consume_rparen(parser)) { | ||
341 | return NULL; | ||
342 | } | 837 | } |
343 | 838 | node->value.sym = prev.val; | |
344 | return node; | 839 | array_push(parser->nodes, node, parser->storage); |
345 | } | 840 | } |
346 | 841 | ||
347 | Node * | 842 | void |
348 | parse_if(Parser *parser) { | 843 | parse_grouping(Parser *parser) { |
349 | Token tok = next_token(parser); | 844 | if (parser->panic) return; |
350 | 845 | #if DEBUG == 1 | |
351 | Node *node = alloc_node(NODE_IF); | 846 | print("parsing group "); |
352 | node->ifexpr.cond = NULL; | 847 | print_token(parser->previous); |
353 | node->ifexpr.expr_true = NULL; | 848 | #endif |
354 | node->ifexpr.expr_false = NULL; | 849 | parse_expr(parser, PREC_LOW); |
355 | node->line = tok.line; | 850 | parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression")); |
356 | node->col = tok.col; | ||
357 | |||
358 | Node *cond = parse_next(parser); | ||
359 | if (cond == NULL) { | ||
360 | return NULL; | ||
361 | } | ||
362 | Node *expr_true = parse_next(parser); | ||
363 | if (expr_true == NULL) { | ||
364 | return NULL; | ||
365 | } | ||
366 | node->ifexpr.cond = cond; | ||
367 | node->ifexpr.expr_true = expr_true; | ||
368 | tok = peek_token(parser); | ||
369 | |||
370 | // Optional else statement. | ||
371 | if (tok.type != TOKEN_RPAREN) { | ||
372 | Node *expr_false = parse_next(parser); | ||
373 | if (expr_false == NULL) { | ||
374 | return NULL; | ||
375 | } | ||
376 | node->ifexpr.expr_false = expr_false; | ||
377 | } | ||
378 | |||
379 | if (!consume_rparen(parser)) { | ||
380 | return NULL; | ||
381 | } | ||
382 | |||
383 | return node; | ||
384 | } | 851 | } |
385 | 852 | ||
386 | Node * | 853 | void |
387 | parse_paren(Parser *parser) { | 854 | graph_node(Node *node) { |
388 | next_token(parser); // Skip paren. | 855 | if (node == NULL) return; |
389 | 856 | print("%d [width=2.5,shape=Mrecord,label=\"", node->id); | |
390 | Token tok = peek_token(parser); | 857 | print("{ <type> %s | { L: %d | C: %d } } ", node_str[node->kind], |
391 | 858 | node->line, node->col); | |
392 | switch (tok.type) { | 859 | switch (node->kind) { |
393 | // Builtin functions. | 860 | case NODE_NUM_INT: print("| Value: %d", node->value.i); break; |
394 | case TOKEN_ADD: | 861 | case NODE_NUM_UINT: print("| Value: %x", node->value.u); break; |
395 | case TOKEN_SUB: | 862 | case NODE_NUM_FLOAT: print("| Value: %f{2}", node->value.d); break; |
396 | case TOKEN_MUL: | 863 | case NODE_STRING: print("| Value: %s", node->value.str); break; |
397 | case TOKEN_DIV: | 864 | case NODE_SYMBOL_IDX: |
398 | case TOKEN_MOD: | 865 | case NODE_STRUCT: |
399 | case TOKEN_NOT: | 866 | case NODE_STRUCT_LIT: print("| Name: %s", node->value.sym); break; |
400 | case TOKEN_AND: | 867 | case NODE_SYMBOL: |
401 | case TOKEN_OR: | 868 | case NODE_TYPE: { |
402 | case TOKEN_EQ: | 869 | if (node->is_ptr) { |
403 | case TOKEN_LT: | 870 | print("| Name: @%s", node->value.sym); |
404 | case TOKEN_GT: | 871 | } else { |
405 | case TOKEN_LE: | 872 | print("| Name: %s", node->value.sym); |
406 | case TOKEN_GE: { return parse_builtin(parser); } break; | 873 | } |
407 | // Special functions. | 874 | } break; |
408 | case TOKEN_DEF: { return parse_def(parser); } break; | ||
409 | case TOKEN_SET: { return parse_set(parser); } break; | ||
410 | case TOKEN_FUN: { return parse_fun(parser); } break; | ||
411 | case TOKEN_IF: { return parse_if(parser); } break; | ||
412 | default: break; | 875 | default: break; |
413 | } | 876 | } |
414 | 877 | println("\"];"); | |
415 | return parse_funcall(parser); | 878 | |
416 | } | 879 | switch (node->kind) { |
417 | 880 | case NODE_COND: | |
418 | Node * | 881 | case NODE_MATCH: { |
419 | parse_block(Parser *parser) { | 882 | if (node->match_expr) { |
420 | Token tok = next_token(parser); | 883 | println("%d:e->%d:w;", node->id, node->match_expr->id); |
421 | 884 | graph_node(node->match_expr); | |
422 | Node *node = alloc_node(NODE_BLOCK); | 885 | } |
423 | array_init(node->block.expr, 0); | 886 | for (sz i = 0; i < array_size(node->match_cases); i++) { |
424 | node->line = tok.line; | 887 | Node *next = node->match_cases[i]; |
425 | node->col = tok.col; | 888 | println("%d:e->%d:w;", node->id, next->id); |
426 | while (true) { | 889 | graph_node(next); |
427 | Token next = peek_token(parser); | 890 | } |
428 | if (next.type == TOKEN_RCURLY) { | 891 | } break; |
429 | next_token(parser); | 892 | case NODE_BLOCK: |
430 | break; | 893 | case NODE_STRUCT_LIT: |
431 | } | 894 | case NODE_COMPOUND_TYPE: { |
432 | Node *expr = parse_next(parser); | 895 | for (sz i = 0; i < array_size(node->elements); i++) { |
433 | if (expr == NULL) { | 896 | Node *next = node->elements[i]; |
434 | return NULL; | 897 | println("%d:e->%d:w;", node->id, next->id); |
435 | } | 898 | graph_node(next); |
436 | array_push(node->block.expr, expr); | 899 | } |
437 | } | 900 | } break; |
438 | return node; | 901 | case NODE_ENUM: |
439 | } | 902 | case NODE_STRUCT: { |
440 | 903 | for (sz i = 0; i < array_size(node->struct_field); i++) { | |
441 | Node * | 904 | Node *next = node->struct_field[i]; |
442 | parse_next(Parser *parser) { | 905 | println("%d:e->%d:w;", node->id, next->id); |
443 | Token tok = peek_token(parser); | 906 | graph_node(next); |
444 | switch (tok.type) { | 907 | } |
445 | case TOKEN_NUMBER: { return parse_number(parser); } break; | 908 | } break; |
446 | case TOKEN_STRING: { return parse_string(parser); } break; | 909 | case NODE_IF: { |
447 | case TOKEN_SYMBOL: { return parse_symbol(parser); } break; | 910 | if (node->cond_if) { |
448 | case TOKEN_TRUE: | 911 | println("%d:e->%d:w;", node->id, node->cond_if->id); |
449 | case TOKEN_FALSE: { return parse_bool(parser); } break; | 912 | graph_node(node->cond_if); |
450 | case TOKEN_LPAREN: { return parse_paren(parser); } break; | 913 | } |
451 | case TOKEN_EOF: { return NULL; } break; | 914 | if (node->cond_expr) { |
452 | case TOKEN_LCURLY: { return parse_block(parser); } break; | 915 | println("%d:e->%d:w;", node->id, node->cond_expr->id); |
916 | graph_node(node->cond_expr); | ||
917 | } | ||
918 | if (node->cond_else) { | ||
919 | println("%d:e->%d:w;", node->id, node->cond_else->id); | ||
920 | graph_node(node->cond_else); | ||
921 | } | ||
922 | } break; | ||
923 | case NODE_VAL_FIELD: | ||
924 | case NODE_SET: | ||
925 | case NODE_LET: { | ||
926 | if (node->var_name) { | ||
927 | println("%d:e->%d:w;", node->id, node->var_name->id); | ||
928 | graph_node(node->var_name); | ||
929 | } | ||
930 | if (node->var_type) { | ||
931 | println("%d:e->%d:w;", node->id, node->var_type->id); | ||
932 | graph_node(node->var_type); | ||
933 | } | ||
934 | if (node->var_val) { | ||
935 | println("%d:e->%d:w;", node->id, node->var_val->id); | ||
936 | graph_node(node->var_val); | ||
937 | } | ||
938 | } break; | ||
453 | default: { | 939 | default: { |
454 | push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_TOK_TYPE, tok.line, tok.col); | 940 | if (node->left) { |
455 | return NULL; | 941 | println("%d:e->%d:w;", node->id, node->left->id); |
942 | graph_node(node->left); | ||
943 | } | ||
944 | if (node->right) { | ||
945 | println("%d:e->%d:w;", node->id, node->right->id); | ||
946 | graph_node(node->right); | ||
947 | } | ||
456 | } break; | 948 | } break; |
457 | } | 949 | } |
458 | } | 950 | } |
459 | 951 | ||
460 | bool | 952 | void |
461 | parse_roots(Parser *parser) { | 953 | graph_ast(Node **roots) { |
462 | while (has_next(parser)) { | 954 | if (roots == NULL) return; |
463 | Token tok = peek_token(parser); | 955 | println("digraph ast {"); |
464 | if (tok.type == TOKEN_EOF) { | 956 | println("rankdir=LR;"); |
465 | break; | 957 | println("ranksep=\"0.95 equally\";"); |
466 | } | 958 | println("nodesep=\"0.5 equally\";"); |
467 | Node *node = parse_next(parser); | 959 | println("overlap=scale;"); |
468 | if (node == NULL) { | 960 | println("bgcolor=\"transparent\";"); |
469 | return false; | 961 | for (sz i = 0; i < array_size(roots); ++i) { |
470 | } | 962 | println("subgraph %d {", i); |
471 | array_push(parser->roots, node); | 963 | Node *root = roots[array_size(roots) - 1 - i]; |
472 | } | 964 | graph_node(root); |
473 | return true; | 965 | println("}"); |
966 | } | ||
967 | println("}"); | ||
474 | } | 968 | } |
475 | 969 | ||
476 | Root * | 970 | #endif // PARSER_C |
477 | parse(Token *tokens) { | ||
478 | Parser parser = { | ||
479 | .tokens = tokens, | ||
480 | .current_token = 0, | ||
481 | }; | ||
482 | array_init(parser.roots, 0); | ||
483 | |||
484 | if (!parse_roots(&parser)) { | ||
485 | return NULL; | ||
486 | } | ||
487 | return parser.roots; | ||
488 | } | ||
diff --git a/src/parser.h b/src/parser.h deleted file mode 100644 index 206ca4c..0000000 --- a/src/parser.h +++ /dev/null | |||
@@ -1,18 +0,0 @@ | |||
1 | #ifndef BDL_PARSER_H | ||
2 | #define BDL_PARSER_H | ||
3 | |||
4 | #include "lexer.h" | ||
5 | #include "nodes.h" | ||
6 | |||
7 | typedef Node* Root; | ||
8 | |||
9 | typedef struct Parser { | ||
10 | Token *tokens; | ||
11 | size_t current_token; | ||
12 | Root *roots; | ||
13 | } Parser; | ||
14 | |||
15 | Root * parse(Token *tokens); | ||
16 | Node * parse_next(Parser *parser); | ||
17 | |||
18 | #endif // BDL_PARSER_H | ||