From 27e6bf016e87702d30ebb7c0716ff3e4f08a8823 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Wed, 19 Jun 2024 08:51:35 +0200 Subject: Move parser to its own file --- src/main.c | 978 +----------------------------------------- src/parser.c | 1340 +++++++++++++++++++++++++++++++++++++++------------------- src/parser.h | 18 - 3 files changed, 912 insertions(+), 1424 deletions(-) delete mode 100644 src/parser.h diff --git a/src/main.c b/src/main.c index d001d9c..66142bd 100644 --- a/src/main.c +++ b/src/main.c @@ -4,6 +4,7 @@ #include "badlib.h" #include "lexer.c" +#include "parser.c" typedef enum ExecMode { RUN_NORMAL, @@ -20,983 +21,6 @@ init(void) { log_init_default(); } -void -print_token(Token tok) { - println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.kind], tok.val); -} - -void -print_tokens(Str path, Token *tokens) { - for (sz i = 0; i < array_size(tokens); i++) { - Token tok = tokens[i]; - print("%s:", path); - print_token(tok); - } -} - -// -// Nodes -// - -typedef enum NodeKind { - // Arithmetic. - NODE_ADD, - NODE_SUB, - NODE_DIV, - NODE_MUL, - NODE_MOD, - // Logical. - NODE_NOT, - NODE_AND, - NODE_OR, - NODE_EQ, - NODE_NEQ, - NODE_LT, - NODE_GT, - NODE_LE, - NODE_GE, - // Bitwise ops. - NODE_BITNOT, - NODE_BITAND, - NODE_BITOR, - NODE_BITLSHIFT, - NODE_BITRSHIFT, - // Literals. - NODE_STRING, - NODE_SYMBOL, - NODE_NUM_INT, - NODE_NUM_UINT, - NODE_NUM_FLOAT, - NODE_TRUE, - NODE_FALSE, - NODE_NIL, - NODE_STRUCT_LIT, - // Keywords. - NODE_LET, - NODE_SET, - NODE_STRUCT, - NODE_IF, - NODE_MATCH, - NODE_CASE, - NODE_COND, - NODE_ENUM, - NODE_BREAK, - NODE_CONTINUE, - NODE_WHILE, - // Helpers. - NODE_SYMBOL_IDX, - NODE_TYPE, - NODE_ARR_TYPE, - NODE_COMPOUND_TYPE, - NODE_VAL_FIELD, - NODE_BLOCK, -} NodeKind; - -Str node_str[] = { - // Arithmetic. - [NODE_ADD] = cstr("ADD"), - [NODE_SUB] = cstr("SUB"), - [NODE_DIV] = cstr("DIV"), - [NODE_MUL] = cstr("MUL"), - [NODE_MOD] = cstr("MOD"), - // Logical. - [NODE_NOT] = cstr("NOT"), - [NODE_AND] = cstr("AND"), - [NODE_OR] = cstr("OR"), - [NODE_EQ] = cstr("EQ"), - [NODE_NEQ] = cstr("NEQ"), - [NODE_LT] = cstr("LT"), - [NODE_GT] = cstr("GT"), - [NODE_LE] = cstr("LE"), - [NODE_GE] = cstr("GE"), - // Bitwise ops. - [NODE_BITNOT] = cstr("BITNOT"), - [NODE_BITAND] = cstr("BITAND"), - [NODE_BITOR] = cstr("BITOR"), - [NODE_BITLSHIFT] = cstr("BITLSHIFT"), - [NODE_BITRSHIFT] = cstr("BITRSHIFT"), - // Literals. - [NODE_STRING] = cstr("STRING"), - [NODE_SYMBOL] = cstr("SYMBOL"), - [NODE_NUM_INT] = cstr("INT"), - [NODE_NUM_UINT] = cstr("UINT"), - [NODE_NUM_FLOAT] = cstr("FLOAT"), - [NODE_TRUE] = cstr("TRUE"), - [NODE_FALSE] = cstr("FALSE"), - [NODE_NIL] = cstr("NIL"), - [NODE_STRUCT_LIT] = cstr("STRUCT LIT"), - // Keywords. - [NODE_LET] = cstr("LET"), - [NODE_SET] = cstr("SET"), - [NODE_STRUCT] = cstr("STRUCT DEF"), - [NODE_IF] = cstr("IF"), - [NODE_MATCH] = cstr("MATCH"), - [NODE_CASE] = cstr("CASE"), - [NODE_COND] = cstr("COND"), - [NODE_ENUM] = cstr("ENUM"), - [NODE_BREAK] = cstr("BREAK"), - [NODE_CONTINUE] = cstr("CONTINUE"), - [NODE_WHILE] = cstr("WHILE"), - // Helpers. - [NODE_TYPE] = cstr("TYPE"), - [NODE_ARR_TYPE] = cstr("TYPE (ARR)"), - [NODE_SYMBOL_IDX] = cstr("SYMBOL[IDX]"), - [NODE_COMPOUND_TYPE] = cstr("TYPE (COMPOUND)"), - [NODE_VAL_FIELD] = cstr("FIELD"), - [NODE_BLOCK] = cstr("BLOCK"), -}; - -typedef struct Node { - sz id; - sz line; - sz col; - - NodeKind kind; - union { - f64 d; - sz i; - u64 u; - Str str; - Str sym; - } value; - union { - struct { - struct Node *left; - struct Node *right; - }; - struct { - struct Node *next; - struct Node *arr_size; - }; - struct { - struct Node *var_name; - struct Node *var_type; - struct Node *var_val; - }; - struct { - struct Node *while_cond; - struct Node *while_expr; - }; - struct { - struct Node *cond_if; - struct Node *cond_expr; - struct Node *cond_else; - }; - struct { - struct Node *field_name; - struct Node *field_type; - struct Node *field_val; - }; - struct { - struct Node *match_expr; - struct Node **match_cases; - }; - struct { - struct Node *case_value; - struct Node *case_expr; - }; - struct Node **struct_field; - struct Node **elements; - struct Node **statements; - }; - bool is_ptr; -} Node; - -// -// Parsing. -// - -typedef struct Parser { - Token current; - Token previous; - Token *tokens; - sz idx; - - // Error handling. - bool err; - bool panic; - Str file_name; - - // Storage. - Node **nodes; - Arena *storage; -} Parser; - -typedef enum { - PREC_NONE = 0, - PREC_LOW, // lowest precedence - PREC_BITLOGIC, // & | - PREC_BITSHIFT, // << >> - PREC_OR, // || - PREC_AND, // && - PREC_EQUALITY, // == != - PREC_COMPARISON, // < > <= >= - PREC_TERM, // + - - PREC_FACTOR, // * / % - PREC_UNARY, // ! - - PREC_CALL, // . () - PREC_PRIMARY // highest precedence -} ParsePrecedence; - -typedef void (*ParseFn)(Parser *); - -typedef struct { - ParseFn prefix; - ParseFn infix; - ParsePrecedence precedence; -} ParseRule; - -void parse_expr(Parser *parser, ParsePrecedence precedence); -void parse_advance(Parser *parser); -bool parse_match(Parser *parser, TokenKind kind); - -void parse_grouping(Parser *parser); -void parse_unary(Parser *parser); -void parse_binary(Parser *parser); -void parse_number(Parser *parser); -void parse_literal(Parser *parser); -void parse_string(Parser *parser); -void parse_symbol(Parser *parser); -void parse_keyword(Parser *parser); -void parse_type(Parser *parser); -void parse_block(Parser *parser); - -ParseRule parse_rules[] = { - [TOK_LPAREN] = {parse_grouping, NULL, PREC_NONE}, - [TOK_LCURLY] = {parse_block, NULL, PREC_NONE}, - [TOK_AT] = {parse_symbol, NULL, PREC_NONE}, - - // Arithmetic. - [TOK_SUB] = {parse_unary, parse_binary, PREC_TERM}, - [TOK_ADD] = {NULL, parse_binary, PREC_TERM}, - [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, - [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, - [TOK_MOD] = {NULL, parse_binary, PREC_FACTOR}, - - // Logical. - [TOK_NOT] = {parse_unary, NULL, PREC_NONE}, - [TOK_AND] = {NULL, parse_binary, PREC_AND}, - [TOK_OR] = {NULL, parse_binary, PREC_OR}, - [TOK_EQ] = {NULL, parse_binary, PREC_EQUALITY}, - [TOK_NEQ] = {NULL, parse_binary, PREC_EQUALITY}, - [TOK_LT] = {NULL, parse_binary, PREC_COMPARISON}, - [TOK_GT] = {NULL, parse_binary, PREC_COMPARISON}, - [TOK_LE] = {NULL, parse_binary, PREC_COMPARISON}, - [TOK_GE] = {NULL, parse_binary, PREC_COMPARISON}, - - // Bitwise ops. - [TOK_BITNOT] = {parse_unary, NULL, PREC_NONE}, - [TOK_BITAND] = {NULL, parse_binary, PREC_BITLOGIC}, - [TOK_BITOR] = {NULL, parse_binary, PREC_BITLOGIC}, - [TOK_BITLSHIFT] = {NULL, parse_binary, PREC_BITSHIFT}, - [TOK_BITRSHIFT] = {NULL, parse_binary, PREC_BITSHIFT}, - - // Literals. - [TOK_STRING] = {parse_string, NULL, PREC_NONE}, - [TOK_SYMBOL] = {parse_symbol, NULL, PREC_NONE}, - [TOK_CHAR] = {parse_number, NULL, PREC_NONE}, - [TOK_NUM_INT] = {parse_number, NULL, PREC_NONE}, - [TOK_NUM_FLOAT] = {parse_number, NULL, PREC_NONE}, - [TOK_TRUE] = {parse_literal, NULL, PREC_NONE}, - [TOK_FALSE] = {parse_literal, NULL, PREC_NONE}, - [TOK_NIL] = {parse_literal, NULL, PREC_NONE}, - - // Statements. - [TOK_LET] = {parse_keyword, NULL, PREC_NONE}, - [TOK_SET] = {parse_keyword, NULL, PREC_NONE}, - [TOK_STRUCT] = {parse_keyword, NULL, PREC_NONE}, - [TOK_ENUM] = {parse_keyword, NULL, PREC_NONE}, - [TOK_IF] = {parse_keyword, NULL, PREC_NONE}, - [TOK_MATCH] = {parse_keyword, NULL, PREC_NONE}, - [TOK_COND] = {parse_keyword, NULL, PREC_NONE}, - [TOK_WHILE] = {parse_keyword, NULL, PREC_NONE}, - [TOK_CONTINUE] = {parse_keyword, NULL, PREC_NONE}, - [TOK_BREAK] = {parse_keyword, NULL, PREC_NONE}, - [TOK_FUN] = {parse_keyword, NULL, PREC_NONE}, - [TOK_RETURN] = {parse_keyword, NULL, PREC_NONE}, - - [TOK_EOF] = {NULL, NULL, PREC_NONE}, -}; - -Node * -node_alloc(Parser *parser, NodeKind kind, Token tok) { - if (parser->panic) return NULL; - static sz id = 0; - Node *node = arena_calloc((sz)sizeof(Node), parser->storage); - node->id = id++; - node->kind = kind; - node->line = tok.line; - node->col = tok.col; - return node; -} - -void -parse_emit_err(Parser *parser, Token token, Str msg) { - if (parser->panic) return; - parser->panic = true; - parser->err = true; - eprint("%s:%d:%d: error: %s", parser->file_name, token.line, token.col, - msg); - - if (token.kind == TOK_EOF) { - eprintln(" at end of the file"); - } else if (token.kind != TOK_UNKNOWN) { - eprintln(" at %s", token.val); - } -} - -void -parse_advance(Parser *parser) { - assert(parser); - parser->previous = parser->current; - if (parser->previous.kind == TOK_EOF) { - return; - } - parser->current = parser->tokens[parser->idx++]; -} - -bool -parse_match(Parser *parser, TokenKind kind) { - assert(parser); - if (parser->current.kind != kind) { - return false; - } - parse_advance(parser); - return true; -} - -static void -parse_consume(Parser *parser, TokenKind kind, Str msg) { - if (parser->current.kind == kind) { - parse_advance(parser); - return; - } - parse_emit_err(parser, parser->current, msg); -} - -void -parse_block(Parser *parser) { -#if DEBUG == 1 - print("parsing block "); - print_token(parser->previous); -#endif - Node *block = node_alloc(parser, NODE_BLOCK, parser->previous); - if (!block) return; - while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { - parse_expr(parser, PREC_LOW); - Node *next = array_pop(parser->nodes); - array_push(block->statements, next, parser->storage); - } - array_push(parser->nodes, block, parser->storage); -} - -void -parse_expr(Parser *parser, ParsePrecedence precedence) { - parse_advance(parser); - // TODO: synchronize panic mode on keywords. - if (parser->panic) return; - ParseFn prefix = parse_rules[parser->previous.kind].prefix; - if (prefix == NULL) { - parse_emit_err(parser, parser->previous, cstr("expected expression")); - return; - } - prefix(parser); - - while (precedence <= parse_rules[parser->current.kind].precedence) { - parse_advance(parser); - ParseFn infix = parse_rules[parser->previous.kind].infix; - if (infix == NULL) { - parse_emit_err(parser, parser->previous, - cstr("expected expression")); - return; - } - infix(parser); - } -} - -void -parse_unary(Parser *parser) { - if (parser->panic) return; - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing unary "); - print_token(prev); -#endif - parse_expr(parser, PREC_UNARY); - Node *node = NULL; - switch (prev.kind) { - case TOK_NOT: node = node_alloc(parser, NODE_NOT, prev); break; - case TOK_BITNOT: node = node_alloc(parser, NODE_BITNOT, prev); break; - default: break; // Unreachable. - } - if (!node) return; - node->left = array_pop(parser->nodes); - array_push(parser->nodes, node, parser->storage); -} - -void -parse_literal(Parser *parser) { - if (parser->panic) return; - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing literal "); - print_token(prev); -#endif - Node *node = NULL; - switch (prev.kind) { - case TOK_TRUE: node = node_alloc(parser, NODE_TRUE, prev); break; - case TOK_FALSE: node = node_alloc(parser, NODE_FALSE, prev); break; - case TOK_NIL: node = node_alloc(parser, NODE_NIL, prev); break; - default: return; // Unreachable. - } - if (!node) return; - array_push(parser->nodes, node, parser->storage); -} - -void -parse_type(Parser *parser) { - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing type "); - print_token(prev); -#endif - Node *node = node_alloc(parser, NODE_TYPE, prev); - if (!node) return; - if (parse_match(parser, TOK_AT)) { - node->is_ptr = true; - } - if (parse_match(parser, TOK_LCURLY)) { - node->kind = NODE_COMPOUND_TYPE; - while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { - parse_consume(parser, TOK_SYMBOL, cstr("expected name")); - parse_symbol(parser); - Node *next = array_pop(parser->nodes); - parse_consume(parser, TOK_COLON, cstr("incomplete type")); - parse_type(parser); - next->next = array_pop(parser->nodes); - array_push(node->elements, next, parser->storage); - } - } else { - parse_consume(parser, TOK_SYMBOL, - cstr("no type given for struct field")); - node->value.sym = parser->previous.val; - // Optional array value? - if (parse_match(parser, TOK_LSQUARE)) { - node->kind = NODE_ARR_TYPE, - parse_consume(parser, TOK_NUM_INT, cstr("no array size given")); - parse_number(parser); - node->arr_size = array_pop(parser->nodes); - parse_consume(parser, TOK_RSQUARE, - cstr("unmatched brackets ']' in array type")); - } - } - array_push(parser->nodes, node, parser->storage); -} - -void -parse_keyword(Parser *parser) { - if (parser->panic) return; - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing keyword "); - print_token(prev); -#endif - Node *node = NULL; - switch (prev.kind) { - case TOK_LET: { - node = node_alloc(parser, NODE_LET, prev); - if (!node) return; - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol name on let expression")); - parse_symbol(parser); - node->var_name = array_pop(parser->nodes); - if (node->var_name->next) { - parse_emit_err(parser, prev, - cstr("invalid symbol name in let expression")); - return; - } - - // Optional type declaration. - if (parse_match(parser, TOK_COLON)) { - parse_type(parser); - node->var_type = array_pop(parser->nodes); - } - - // Optional assignment. - if (parse_match(parser, TOK_ASSIGN)) { - parse_expr(parser, PREC_LOW); - node->var_val = array_pop(parser->nodes); - } - } break; - case TOK_SET: { - node = node_alloc(parser, NODE_SET, prev); - if (!node) return; - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol name on let expression")); - parse_symbol(parser); - node->var_name = array_pop(parser->nodes); - parse_consume(parser, TOK_ASSIGN, - cstr("expected assignment on set expression")); - parse_expr(parser, PREC_LOW); - node->var_val = array_pop(parser->nodes); - } break; - case TOK_STRUCT: { - node = node_alloc(parser, NODE_STRUCT, prev); - if (!node) return; - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol name on struct definition")); - // Just consume this to avoid conflicts with struct literals. - node->value.sym = parser->previous.val; - - parse_consume(parser, TOK_LCURLY, - cstr("expected '{' on struct definition")); - - while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { - Node *field = - node_alloc(parser, NODE_VAL_FIELD, parser->current); - if (!field) return; - parse_consume( - parser, TOK_SYMBOL, - cstr("expected symbol name on struct definition")); - parse_symbol(parser); - field->field_name = array_pop(parser->nodes); - if (field->field_name->next || - field->field_name->kind == NODE_SYMBOL_IDX) { - parse_emit_err(parser, prev, - cstr("invalid symbol name in struct field")); - return; - } - parse_consume( - parser, TOK_COLON, - cstr("expected symbol name on struct definition")); - parse_type(parser); - field->field_type = array_pop(parser->nodes); - - // Optional assignment. - if (parse_match(parser, TOK_ASSIGN)) { - parse_expr(parser, PREC_LOW); - field->field_val = array_pop(parser->nodes); - } - - array_push(node->struct_field, field, parser->storage); - } - } break; - case TOK_IF: { - node = node_alloc(parser, NODE_IF, prev); - if (!node) return; - parse_consume(parser, TOK_LPAREN, - cstr("expected '(' on if expression")); - parse_expr(parser, PREC_LOW); - node->cond_if = array_pop(parser->nodes); - parse_consume(parser, TOK_RPAREN, - cstr("expected ')' on if expression")); - parse_expr(parser, PREC_LOW); - node->cond_expr = array_pop(parser->nodes); - if (parse_match(parser, TOK_ELSE)) { - parse_expr(parser, PREC_LOW); - node->cond_else = array_pop(parser->nodes); - } - } break; - case TOK_MATCH: { - node = node_alloc(parser, NODE_MATCH, prev); - if (!node) return; - parse_consume(parser, TOK_LPAREN, - cstr("expected '(' on if expression")); - parse_expr(parser, PREC_LOW); - node->match_expr = array_pop(parser->nodes); - parse_consume(parser, TOK_RPAREN, - cstr("expected ')' on if expression")); - parse_consume(parser, TOK_LCURLY, - cstr("expected block of match cases")); - while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { - Node *tmp = node_alloc(parser, NODE_CASE, parser->previous); - if (!tmp) return; - // Are we on the default case. - if (!parse_match(parser, TOK_ELSE)) { - parse_consume(parser, TOK_CASE, - cstr("expected case statement")); - parse_expr(parser, PREC_LOW); - tmp->case_value = array_pop(parser->nodes); - } - parse_consume(parser, TOK_ASSIGN, - cstr("malformed case statement")); - parse_expr(parser, PREC_LOW); - tmp->case_expr = array_pop(parser->nodes); - array_push(node->match_cases, tmp, parser->storage); - } - // TODO: Check that we only have literals on the match case, this - // could be done on the analysis step, but also here... - // TODO: Check that there are no multiple default or duplicated - // cases. - } break; - case TOK_ENUM: { - node = node_alloc(parser, NODE_ENUM, prev); - if (!node) return; - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol name on enum definition")); - // Just consume this to avoid conflicts with struct literals. - node->value.sym = parser->previous.val; - parse_consume(parser, TOK_LCURLY, - cstr("expected '{' on enum definition")); - while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { - Node *field = - node_alloc(parser, NODE_VAL_FIELD, parser->current); - if (!field) return; - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol name on enum definition")); - parse_symbol(parser); - field->field_name = array_pop(parser->nodes); - if (field->field_name->next || - field->field_name->kind == NODE_SYMBOL_IDX) { - parse_emit_err(parser, prev, - cstr("invalid symbol name in enum field")); - return; - } - if (parse_match(parser, TOK_ASSIGN)) { - parse_expr(parser, PREC_LOW); - field->field_val = array_pop(parser->nodes); - } - array_push(node->struct_field, field, parser->storage); - } - } break; - case TOK_COND: { - node = node_alloc(parser, NODE_COND, prev); - if (!node) return; - parse_consume(parser, TOK_LCURLY, - cstr("expected '{' on cond expression")); - while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { - Node *tmp = node_alloc(parser, NODE_CASE, parser->previous); - if (!tmp) return; - // Are we on the default case. - if (!parse_match(parser, TOK_ELSE)) { - parse_consume(parser, TOK_CASE, - cstr("expected case statement")); - parse_expr(parser, PREC_LOW); - tmp->case_value = array_pop(parser->nodes); - } - parse_consume(parser, TOK_ASSIGN, - cstr("malformed case statement")); - parse_expr(parser, PREC_LOW); - tmp->case_expr = array_pop(parser->nodes); - array_push(node->match_cases, tmp, parser->storage); - } - } break; - case TOK_BREAK: { - node = node_alloc(parser, NODE_BREAK, prev); - if (!node) return; - } break; - case TOK_CONTINUE: { - node = node_alloc(parser, NODE_CONTINUE, prev); - if (!node) return; - } break; - case TOK_WHILE: { - node = node_alloc(parser, NODE_WHILE, prev); - if (!node) return; - parse_consume(parser, TOK_LPAREN, - cstr("expected '(' on while expression")); - parse_expr(parser, PREC_LOW); - node->while_cond = array_pop(parser->nodes); - parse_consume(parser, TOK_RPAREN, - cstr("expected ')' on while expression")); - parse_expr(parser, PREC_LOW); - node->while_expr = array_pop(parser->nodes); - } break; - default: return; // Unreachable. - } - array_push(parser->nodes, node, parser->storage); -} - -void -parse_binary(Parser *parser) { - if (parser->panic) return; - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing binary "); - print_token(prev); -#endif - ParseRule rule = parse_rules[prev.kind]; - parse_expr(parser, rule.precedence + 1); - - Node *node; - switch (prev.kind) { - case TOK_ADD: node = node_alloc(parser, NODE_ADD, prev); break; - case TOK_SUB: node = node_alloc(parser, NODE_SUB, prev); break; - case TOK_MUL: node = node_alloc(parser, NODE_MUL, prev); break; - case TOK_DIV: node = node_alloc(parser, NODE_DIV, prev); break; - case TOK_MOD: node = node_alloc(parser, NODE_MOD, prev); break; - case TOK_AND: node = node_alloc(parser, NODE_AND, prev); break; - case TOK_OR: node = node_alloc(parser, NODE_OR, prev); break; - case TOK_EQ: node = node_alloc(parser, NODE_EQ, prev); break; - case TOK_NEQ: node = node_alloc(parser, NODE_NEQ, prev); break; - case TOK_LT: node = node_alloc(parser, NODE_LT, prev); break; - case TOK_GT: node = node_alloc(parser, NODE_GT, prev); break; - case TOK_LE: node = node_alloc(parser, NODE_LE, prev); break; - case TOK_GE: node = node_alloc(parser, NODE_GE, prev); break; - case TOK_BITAND: { - node = node_alloc(parser, NODE_BITAND, prev); - } break; - case TOK_BITOR: { - node = node_alloc(parser, NODE_BITOR, prev); - } break; - case TOK_BITLSHIFT: { - node = node_alloc(parser, NODE_BITLSHIFT, prev); - } break; - case TOK_BITRSHIFT: { - node = node_alloc(parser, NODE_BITRSHIFT, prev); - } break; - default: { - parse_emit_err(parser, prev, cstr("unreachable")); - return; - } - } - if (!node) return; - node->right = array_pop(parser->nodes); - node->left = array_pop(parser->nodes); - array_push(parser->nodes, node, parser->storage); -} - -void -parse_number(Parser *parser) { - if (parser->panic) return; - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing number "); - print_token(prev); -#endif - Node *node = NULL; - switch (prev.kind) { - case TOK_NUM_INT: { - if (str_has_prefix(prev.val, cstr("0x")) || - str_has_prefix(prev.val, cstr("0b"))) { - node = node_alloc(parser, NODE_NUM_UINT, prev); - if (!node) return; - node->value.u = str_to_uint(prev.val); - } else { - node = node_alloc(parser, NODE_NUM_INT, prev); - if (!node) return; - node->value.i = str_to_int(prev.val); - } - } break; - case TOK_NUM_FLOAT: { - node = node_alloc(parser, NODE_NUM_FLOAT, prev); - if (!node) return; - node->value.d = str_to_float(prev.val); - } break; - case TOK_CHAR: { - node = node_alloc(parser, NODE_NUM_INT, prev); - if (!node) return; - node->value.i = prev.val.mem[1]; - } break; - default: break; - } - array_push(parser->nodes, node, parser->storage); -} - -void -parse_string(Parser *parser) { - if (parser->panic) return; - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing string "); - print_token(prev); -#endif - Node *node = node_alloc(parser, NODE_STRING, prev); - if (!node) return; - node->value.str = str_remove_prefix(prev.val, cstr("\"")); - node->value.str = str_remove_suffix(node->value.str, cstr("\"")); - array_push(parser->nodes, node, parser->storage); -} - -void -parse_symbol(Parser *parser) { - if (parser->panic) return; - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing symbol "); - print_token(prev); -#endif - if (prev.kind == TOK_AT) { - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol after '.' operator")); - parse_symbol(parser); - Node *node = array_pop(parser->nodes); - if (node) { - node->is_ptr = true; - array_push(parser->nodes, node, parser->storage); - } - return; - } - Node *node = node_alloc(parser, NODE_SYMBOL, prev); - if (!node) return; - if (parse_match(parser, TOK_DOT)) { - // Symbol chain. - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol after '.' operator")); - parse_symbol(parser); - node->next = array_pop(parser->nodes); - } else if (parse_match(parser, TOK_LCURLY)) { - // Struct literal. - node->kind = NODE_STRUCT_LIT; - while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { - Node *field = node_alloc(parser, NODE_VAL_FIELD, parser->current); - parse_consume(parser, TOK_SYMBOL, cstr("expected symbol name")); - parse_symbol(parser); - field->field_name = array_pop(parser->nodes); - parse_consume(parser, TOK_ASSIGN, cstr("expected assignment")); - parse_expr(parser, PREC_LOW); - field->field_type = array_pop(parser->nodes); - array_push(node->elements, field, parser->storage); - } - } else if (parse_match(parser, TOK_LSQUARE)) { - node->kind = NODE_SYMBOL_IDX; - parse_consume(parser, TOK_NUM_INT, cstr("no array size given")); - parse_number(parser); - node->arr_size = array_pop(parser->nodes); - parse_consume(parser, TOK_RSQUARE, - cstr("unmatched brackets ']' in array type")); - if (parse_match(parser, TOK_DOT)) { - // Symbol chain. - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol after '.' operator")); - parse_symbol(parser); - node->next = array_pop(parser->nodes); - } - } else { - node = node_alloc(parser, NODE_SYMBOL, prev); - if (!node) return; - } - node->value.sym = prev.val; - array_push(parser->nodes, node, parser->storage); -} - -void -parse_grouping(Parser *parser) { - if (parser->panic) return; -#if DEBUG == 1 - print("parsing group "); - print_token(parser->previous); -#endif - parse_expr(parser, PREC_LOW); - parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression")); -} - -void -graph_node(Node *node) { - if (node == NULL) return; - print("%d [width=2.5,shape=Mrecord,label=\"", node->id); - print("{ %s | { L: %d | C: %d } } ", node_str[node->kind], - node->line, node->col); - switch (node->kind) { - case NODE_NUM_INT: print("| Value: %d", node->value.i); break; - case NODE_NUM_UINT: print("| Value: %x", node->value.u); break; - case NODE_NUM_FLOAT: print("| Value: %f{2}", node->value.d); break; - case NODE_STRING: print("| Value: %s", node->value.str); break; - case NODE_SYMBOL_IDX: - case NODE_STRUCT: - case NODE_STRUCT_LIT: print("| Name: %s", node->value.sym); break; - case NODE_SYMBOL: - case NODE_TYPE: { - if (node->is_ptr) { - print("| Name: @%s", node->value.sym); - } else { - print("| Name: %s", node->value.sym); - } - } break; - default: break; - } - println("\"];"); - - switch (node->kind) { - case NODE_COND: - case NODE_MATCH: { - if (node->match_expr) { - println("%d:e->%d:w;", node->id, node->match_expr->id); - graph_node(node->match_expr); - } - for (sz i = 0; i < array_size(node->match_cases); i++) { - Node *next = node->match_cases[i]; - println("%d:e->%d:w;", node->id, next->id); - graph_node(next); - } - } break; - case NODE_BLOCK: - case NODE_STRUCT_LIT: - case NODE_COMPOUND_TYPE: { - for (sz i = 0; i < array_size(node->elements); i++) { - Node *next = node->elements[i]; - println("%d:e->%d:w;", node->id, next->id); - graph_node(next); - } - } break; - case NODE_ENUM: - case NODE_STRUCT: { - for (sz i = 0; i < array_size(node->struct_field); i++) { - Node *next = node->struct_field[i]; - println("%d:e->%d:w;", node->id, next->id); - graph_node(next); - } - } break; - case NODE_IF: { - if (node->cond_if) { - println("%d:e->%d:w;", node->id, node->cond_if->id); - graph_node(node->cond_if); - } - if (node->cond_expr) { - println("%d:e->%d:w;", node->id, node->cond_expr->id); - graph_node(node->cond_expr); - } - if (node->cond_else) { - println("%d:e->%d:w;", node->id, node->cond_else->id); - graph_node(node->cond_else); - } - } break; - case NODE_VAL_FIELD: - case NODE_SET: - case NODE_LET: { - if (node->var_name) { - println("%d:e->%d:w;", node->id, node->var_name->id); - graph_node(node->var_name); - } - if (node->var_type) { - println("%d:e->%d:w;", node->id, node->var_type->id); - graph_node(node->var_type); - } - if (node->var_val) { - println("%d:e->%d:w;", node->id, node->var_val->id); - graph_node(node->var_val); - } - } break; - default: { - if (node->left) { - println("%d:e->%d:w;", node->id, node->left->id); - graph_node(node->left); - } - if (node->right) { - println("%d:e->%d:w;", node->id, node->right->id); - graph_node(node->right); - } - } break; - } -} - -void -graph_ast(Node **roots) { - if (roots == NULL) return; - println("digraph ast {"); - println("rankdir=LR;"); - println("ranksep=\"0.95 equally\";"); - println("nodesep=\"0.5 equally\";"); - println("overlap=scale;"); - println("bgcolor=\"transparent\";"); - for (sz i = 0; i < array_size(roots); ++i) { - println("subgraph %d {", i); - Node *root = roots[array_size(roots) - 1 - i]; - graph_node(root); - println("}"); - } - println("}"); -} - void process_file(Str path) { Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); diff --git a/src/parser.c b/src/parser.c index 3f15b47..84f3424 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,488 +1,970 @@ -#include "parser.h" -#include "darray.h" +#ifndef PARSER_C +#define PARSER_C + +#include "badlib.h" +#include "lexer.c" + +// +// Nodes +// + +typedef enum NodeKind { + // Arithmetic. + NODE_ADD, + NODE_SUB, + NODE_DIV, + NODE_MUL, + NODE_MOD, + // Logical. + NODE_NOT, + NODE_AND, + NODE_OR, + NODE_EQ, + NODE_NEQ, + NODE_LT, + NODE_GT, + NODE_LE, + NODE_GE, + // Bitwise ops. + NODE_BITNOT, + NODE_BITAND, + NODE_BITOR, + NODE_BITLSHIFT, + NODE_BITRSHIFT, + // Literals. + NODE_STRING, + NODE_SYMBOL, + NODE_NUM_INT, + NODE_NUM_UINT, + NODE_NUM_FLOAT, + NODE_TRUE, + NODE_FALSE, + NODE_NIL, + NODE_STRUCT_LIT, + // Keywords. + NODE_LET, + NODE_SET, + NODE_STRUCT, + NODE_IF, + NODE_MATCH, + NODE_CASE, + NODE_COND, + NODE_ENUM, + NODE_BREAK, + NODE_CONTINUE, + NODE_WHILE, + // Helpers. + NODE_SYMBOL_IDX, + NODE_TYPE, + NODE_ARR_TYPE, + NODE_COMPOUND_TYPE, + NODE_VAL_FIELD, + NODE_BLOCK, +} NodeKind; + +Str node_str[] = { + // Arithmetic. + [NODE_ADD] = cstr("ADD"), + [NODE_SUB] = cstr("SUB"), + [NODE_DIV] = cstr("DIV"), + [NODE_MUL] = cstr("MUL"), + [NODE_MOD] = cstr("MOD"), + // Logical. + [NODE_NOT] = cstr("NOT"), + [NODE_AND] = cstr("AND"), + [NODE_OR] = cstr("OR"), + [NODE_EQ] = cstr("EQ"), + [NODE_NEQ] = cstr("NEQ"), + [NODE_LT] = cstr("LT"), + [NODE_GT] = cstr("GT"), + [NODE_LE] = cstr("LE"), + [NODE_GE] = cstr("GE"), + // Bitwise ops. + [NODE_BITNOT] = cstr("BITNOT"), + [NODE_BITAND] = cstr("BITAND"), + [NODE_BITOR] = cstr("BITOR"), + [NODE_BITLSHIFT] = cstr("BITLSHIFT"), + [NODE_BITRSHIFT] = cstr("BITRSHIFT"), + // Literals. + [NODE_STRING] = cstr("STRING"), + [NODE_SYMBOL] = cstr("SYMBOL"), + [NODE_NUM_INT] = cstr("INT"), + [NODE_NUM_UINT] = cstr("UINT"), + [NODE_NUM_FLOAT] = cstr("FLOAT"), + [NODE_TRUE] = cstr("TRUE"), + [NODE_FALSE] = cstr("FALSE"), + [NODE_NIL] = cstr("NIL"), + [NODE_STRUCT_LIT] = cstr("STRUCT LIT"), + // Keywords. + [NODE_LET] = cstr("LET"), + [NODE_SET] = cstr("SET"), + [NODE_STRUCT] = cstr("STRUCT DEF"), + [NODE_IF] = cstr("IF"), + [NODE_MATCH] = cstr("MATCH"), + [NODE_CASE] = cstr("CASE"), + [NODE_COND] = cstr("COND"), + [NODE_ENUM] = cstr("ENUM"), + [NODE_BREAK] = cstr("BREAK"), + [NODE_CONTINUE] = cstr("CONTINUE"), + [NODE_WHILE] = cstr("WHILE"), + // Helpers. + [NODE_TYPE] = cstr("TYPE"), + [NODE_ARR_TYPE] = cstr("TYPE (ARR)"), + [NODE_SYMBOL_IDX] = cstr("SYMBOL[IDX]"), + [NODE_COMPOUND_TYPE] = cstr("TYPE (COMPOUND)"), + [NODE_VAL_FIELD] = cstr("FIELD"), + [NODE_BLOCK] = cstr("BLOCK"), +}; + +typedef struct Node { + sz id; + sz line; + sz col; + + NodeKind kind; + union { + f64 d; + sz i; + u64 u; + Str str; + Str sym; + } value; + union { + struct { + struct Node *left; + struct Node *right; + }; + struct { + struct Node *next; + struct Node *arr_size; + }; + struct { + struct Node *var_name; + struct Node *var_type; + struct Node *var_val; + }; + struct { + struct Node *while_cond; + struct Node *while_expr; + }; + struct { + struct Node *cond_if; + struct Node *cond_expr; + struct Node *cond_else; + }; + struct { + struct Node *field_name; + struct Node *field_type; + struct Node *field_val; + }; + struct { + struct Node *match_expr; + struct Node **match_cases; + }; + struct { + struct Node *case_value; + struct Node *case_expr; + }; + struct Node **struct_field; + struct Node **elements; + struct Node **statements; + }; + bool is_ptr; +} Node; + +// +// Parsing. +// + +typedef struct Parser { + Token current; + Token previous; + Token *tokens; + sz idx; + + // Error handling. + bool err; + bool panic; + Str file_name; + + // Storage. + Node **nodes; + Arena *storage; +} Parser; + +typedef enum { + PREC_NONE = 0, + PREC_LOW, // lowest precedence + PREC_BITLOGIC, // & | + PREC_BITSHIFT, // << >> + PREC_OR, // || + PREC_AND, // && + PREC_EQUALITY, // == != + PREC_COMPARISON, // < > <= >= + PREC_TERM, // + - + PREC_FACTOR, // * / % + PREC_UNARY, // ! - + PREC_CALL, // . () + PREC_PRIMARY // highest precedence +} ParsePrecedence; + +typedef void (*ParseFn)(Parser *); + +typedef struct { + ParseFn prefix; + ParseFn infix; + ParsePrecedence precedence; +} ParseRule; + +void parse_expr(Parser *parser, ParsePrecedence precedence); +void parse_advance(Parser *parser); +bool parse_match(Parser *parser, TokenKind kind); + +void parse_grouping(Parser *parser); +void parse_unary(Parser *parser); +void parse_binary(Parser *parser); +void parse_number(Parser *parser); +void parse_literal(Parser *parser); +void parse_string(Parser *parser); +void parse_symbol(Parser *parser); +void parse_keyword(Parser *parser); +void parse_type(Parser *parser); +void parse_block(Parser *parser); + +ParseRule parse_rules[] = { + [TOK_LPAREN] = {parse_grouping, NULL, PREC_NONE}, + [TOK_LCURLY] = {parse_block, NULL, PREC_NONE}, + [TOK_AT] = {parse_symbol, NULL, PREC_NONE}, + + // Arithmetic. + [TOK_SUB] = {parse_unary, parse_binary, PREC_TERM}, + [TOK_ADD] = {NULL, parse_binary, PREC_TERM}, + [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, + [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, + [TOK_MOD] = {NULL, parse_binary, PREC_FACTOR}, + + // Logical. + [TOK_NOT] = {parse_unary, NULL, PREC_NONE}, + [TOK_AND] = {NULL, parse_binary, PREC_AND}, + [TOK_OR] = {NULL, parse_binary, PREC_OR}, + [TOK_EQ] = {NULL, parse_binary, PREC_EQUALITY}, + [TOK_NEQ] = {NULL, parse_binary, PREC_EQUALITY}, + [TOK_LT] = {NULL, parse_binary, PREC_COMPARISON}, + [TOK_GT] = {NULL, parse_binary, PREC_COMPARISON}, + [TOK_LE] = {NULL, parse_binary, PREC_COMPARISON}, + [TOK_GE] = {NULL, parse_binary, PREC_COMPARISON}, + + // Bitwise ops. + [TOK_BITNOT] = {parse_unary, NULL, PREC_NONE}, + [TOK_BITAND] = {NULL, parse_binary, PREC_BITLOGIC}, + [TOK_BITOR] = {NULL, parse_binary, PREC_BITLOGIC}, + [TOK_BITLSHIFT] = {NULL, parse_binary, PREC_BITSHIFT}, + [TOK_BITRSHIFT] = {NULL, parse_binary, PREC_BITSHIFT}, + + // Literals. + [TOK_STRING] = {parse_string, NULL, PREC_NONE}, + [TOK_SYMBOL] = {parse_symbol, NULL, PREC_NONE}, + [TOK_CHAR] = {parse_number, NULL, PREC_NONE}, + [TOK_NUM_INT] = {parse_number, NULL, PREC_NONE}, + [TOK_NUM_FLOAT] = {parse_number, NULL, PREC_NONE}, + [TOK_TRUE] = {parse_literal, NULL, PREC_NONE}, + [TOK_FALSE] = {parse_literal, NULL, PREC_NONE}, + [TOK_NIL] = {parse_literal, NULL, PREC_NONE}, + + // Statements. + [TOK_LET] = {parse_keyword, NULL, PREC_NONE}, + [TOK_SET] = {parse_keyword, NULL, PREC_NONE}, + [TOK_STRUCT] = {parse_keyword, NULL, PREC_NONE}, + [TOK_ENUM] = {parse_keyword, NULL, PREC_NONE}, + [TOK_IF] = {parse_keyword, NULL, PREC_NONE}, + [TOK_MATCH] = {parse_keyword, NULL, PREC_NONE}, + [TOK_COND] = {parse_keyword, NULL, PREC_NONE}, + [TOK_WHILE] = {parse_keyword, NULL, PREC_NONE}, + [TOK_CONTINUE] = {parse_keyword, NULL, PREC_NONE}, + [TOK_BREAK] = {parse_keyword, NULL, PREC_NONE}, + [TOK_FUN] = {parse_keyword, NULL, PREC_NONE}, + [TOK_RETURN] = {parse_keyword, NULL, PREC_NONE}, + + [TOK_EOF] = {NULL, NULL, PREC_NONE}, +}; -Token -next_token(Parser *parser) { - return parser->tokens[parser->current_token++]; +Node * +node_alloc(Parser *parser, NodeKind kind, Token tok) { + if (parser->panic) return NULL; + static sz id = 0; + Node *node = arena_calloc((sz)sizeof(Node), parser->storage); + node->id = id++; + node->kind = kind; + node->line = tok.line; + node->col = tok.col; + return node; } -Token -peek_token(Parser *parser) { - return parser->tokens[parser->current_token]; -} +void +parse_emit_err(Parser *parser, Token token, Str msg) { + if (parser->panic) return; + parser->panic = true; + parser->err = true; + eprint("%s:%d:%d: error: %s", parser->file_name, token.line, token.col, + msg); -bool -has_next(Parser *parser) { - return parser->current_token < array_size(parser->tokens); + if (token.kind == TOK_EOF) { + eprintln(" at end of the file"); + } else if (token.kind != TOK_UNKNOWN) { + eprintln(" at %s", token.val); + } } -bool -consume_rparen(Parser *parser) { - Token tok = next_token(parser); - if (tok.type != TOKEN_RPAREN) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_RPAREN, tok.line, tok.col); - return false; +void +parse_advance(Parser *parser) { + assert(parser); + parser->previous = parser->current; + if (parser->previous.kind == TOK_EOF) { + return; } - return true; + parser->current = parser->tokens[parser->idx++]; } bool -consume_lparen(Parser *parser) { - Token tok = next_token(parser); - if (tok.type != TOKEN_LPAREN) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_LPAREN, tok.line, tok.col); +parse_match(Parser *parser, TokenKind kind) { + assert(parser); + if (parser->current.kind != kind) { return false; } + parse_advance(parser); return true; } -Node * -parse_number(Parser *parser) { - Token tok = next_token(parser); - if (tok.type != TOKEN_NUMBER) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_NUMBER, tok.line, tok.col); - return NULL; - } - - bool negative = false; - int base = 10; - char c = sv_next(&tok.value); - if (c == '-') { - negative = true; - c = sv_next(&tok.value); - } - if (c == '+') { - c = sv_next(&tok.value); - } - if (c == '0' && sv_peek(&tok.value) != '\0') { - c = sv_next(&tok.value); - if (c == 'x') { - base = 16; - c = sv_next(&tok.value); - } else if (c == 'b') { - base = 2; - c = sv_next(&tok.value); - } else if (!(c >= '0' && c <= '9')){ - push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); - return NULL; - } - } - - // Integral part. - u64 integral = 0; - while (c != '\0') { - ssize_t current = 0; - if (c >= 'a' && c <= 'z' && base == 16) { - current = (c - 'a') + 10; - } else if (c >= 'A' && c <= 'Z' && base == 16) { - current = (c - 'A') + 10; - } else if (c >= '0' && c <= '9') { - current = (c - '0'); - } else if (c == '.') { - c = sv_next(&tok.value); - break; - } else { - push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); - return NULL; - } - integral = integral * base + current; - c = sv_next(&tok.value); - } - - // Fractional part. - u64 fractional = 0; - while (c != '\0') { - ssize_t current = 0; - if (c >= 'a' && c <= 'z' && base == 16) { - current = (c - 'a') + 10; - } else if (c >= 'A' && c <= 'Z' && base == 16) { - current = (c - 'A') + 10; - } else if (c >= '0' && c <= '9') { - current = (c - '0'); - } else { - push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); - return NULL; - } - fractional = fractional * base + current; - c = sv_next(&tok.value); +static void +parse_consume(Parser *parser, TokenKind kind, Str msg) { + if (parser->current.kind == kind) { + parse_advance(parser); + return; } - - Node * node = alloc_node(NODE_NUMBER); - node->number.negative = negative; - node->number.integral = integral; - node->number.fractional = fractional; - node->line = tok.line; - node->col = tok.col; - return node; + parse_emit_err(parser, parser->current, msg); } -Node * -parse_string(Parser *parser) { - Token tok = next_token(parser); - if (tok.type != TOKEN_STRING) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_STRING, tok.line, tok.col); - return NULL; - } - Node *node = alloc_node(NODE_STRING); - node->string = tok.value; - node->line = tok.line; - node->col = tok.col; - return node; +void +parse_block(Parser *parser) { +#if DEBUG == 1 + print("parsing block "); + print_token(parser->previous); +#endif + Node *block = node_alloc(parser, NODE_BLOCK, parser->previous); + if (!block) return; + while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { + parse_expr(parser, PREC_LOW); + Node *next = array_pop(parser->nodes); + array_push(block->statements, next, parser->storage); + } + array_push(parser->nodes, block, parser->storage); } -Node * -parse_symbol(Parser *parser) { - Token tok = next_token(parser); - if (tok.type != TOKEN_SYMBOL) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_SYMBOL, tok.line, tok.col); - return NULL; +void +parse_expr(Parser *parser, ParsePrecedence precedence) { + parse_advance(parser); + // TODO: synchronize panic mode on keywords. + if (parser->panic) return; + ParseFn prefix = parse_rules[parser->previous.kind].prefix; + if (prefix == NULL) { + parse_emit_err(parser, parser->previous, cstr("expected expression")); + return; + } + prefix(parser); + + while (precedence <= parse_rules[parser->current.kind].precedence) { + parse_advance(parser); + ParseFn infix = parse_rules[parser->previous.kind].infix; + if (infix == NULL) { + parse_emit_err(parser, parser->previous, + cstr("expected expression")); + return; + } + infix(parser); } - Node *node = alloc_node(NODE_SYMBOL); - node->string = tok.value; - node->line = tok.line; - node->col = tok.col; - return node; } -Node * -parse_type(Parser *parser) { - Token tok = next_token(parser); - if (tok.type != TOKEN_COLON) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); - return NULL; - } - Node *node = alloc_node(NODE_TYPE); - node->line = tok.line; - node->col = tok.col; - tok = next_token(parser); - if (tok.type != TOKEN_SYMBOL) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); - return NULL; - } - node->string = tok.value; - return node; +void +parse_unary(Parser *parser) { + if (parser->panic) return; + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing unary "); + print_token(prev); +#endif + parse_expr(parser, PREC_UNARY); + Node *node = NULL; + switch (prev.kind) { + case TOK_NOT: node = node_alloc(parser, NODE_NOT, prev); break; + case TOK_BITNOT: node = node_alloc(parser, NODE_BITNOT, prev); break; + default: break; // Unreachable. + } + if (!node) return; + node->left = array_pop(parser->nodes); + array_push(parser->nodes, node, parser->storage); } -Node * -parse_bool(Parser *parser) { - Token tok = next_token(parser); - if (!(tok.type == TOKEN_TRUE || tok.type == TOKEN_FALSE)) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_BOOL, tok.line, tok.col); - return NULL; - } - Node *node = alloc_node(NODE_BOOL); - node->boolean = tok.type == TOKEN_TRUE; - node->line = tok.line; - node->col = tok.col; - return node; +void +parse_literal(Parser *parser) { + if (parser->panic) return; + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing literal "); + print_token(prev); +#endif + Node *node = NULL; + switch (prev.kind) { + case TOK_TRUE: node = node_alloc(parser, NODE_TRUE, prev); break; + case TOK_FALSE: node = node_alloc(parser, NODE_FALSE, prev); break; + case TOK_NIL: node = node_alloc(parser, NODE_NIL, prev); break; + default: return; // Unreachable. + } + if (!node) return; + array_push(parser->nodes, node, parser->storage); } -Node * -parse_builtin(Parser *parser) { - Token tok = next_token(parser); - Node *node = alloc_node(NODE_BUILTIN); - node->builtin.type = tok.type; - node->line = tok.line; - node->col = tok.col; - array_init(node->builtin.args, 0); - while (has_next(parser)) { - Token next = peek_token(parser); - if (next.type == TOKEN_RPAREN) { - next_token(parser); - return node; +void +parse_type(Parser *parser) { + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing type "); + print_token(prev); +#endif + Node *node = node_alloc(parser, NODE_TYPE, prev); + if (!node) return; + if (parse_match(parser, TOK_AT)) { + node->is_ptr = true; + } + if (parse_match(parser, TOK_LCURLY)) { + node->kind = NODE_COMPOUND_TYPE; + while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { + parse_consume(parser, TOK_SYMBOL, cstr("expected name")); + parse_symbol(parser); + Node *next = array_pop(parser->nodes); + parse_consume(parser, TOK_COLON, cstr("incomplete type")); + parse_type(parser); + next->next = array_pop(parser->nodes); + array_push(node->elements, next, parser->storage); } - Node *arg = parse_next(parser); - if (arg == NULL) { - break; + } else { + parse_consume(parser, TOK_SYMBOL, + cstr("no type given for struct field")); + node->value.sym = parser->previous.val; + // Optional array value? + if (parse_match(parser, TOK_LSQUARE)) { + node->kind = NODE_ARR_TYPE, + parse_consume(parser, TOK_NUM_INT, cstr("no array size given")); + parse_number(parser); + node->arr_size = array_pop(parser->nodes); + parse_consume(parser, TOK_RSQUARE, + cstr("unmatched brackets ']' in array type")); } - array_push(node->builtin.args, arg); } - push_error(ERR_TYPE_PARSER, ERR_UNMATCHED_PAREN, tok.line, tok.col); - return NULL; + array_push(parser->nodes, node, parser->storage); } -Node * -parse_funcall(Parser *parser) { - Token tok = peek_token(parser); - Node *name = parse_symbol(parser); - if (name == NULL) { - return NULL; - } - Node *node = alloc_node(NODE_FUNCALL); - node->funcall.name = name; - node->line = tok.line; - node->col = tok.col; - array_init(node->funcall.args, 0); - while (has_next(parser)) { - Token next = peek_token(parser); - if (next.type == TOKEN_RPAREN) { - next_token(parser); - return node; - } - Node *arg = parse_next(parser); - if (arg == NULL) { - break; - } - array_push(node->funcall.args, arg); +void +parse_keyword(Parser *parser) { + if (parser->panic) return; + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing keyword "); + print_token(prev); +#endif + Node *node = NULL; + switch (prev.kind) { + case TOK_LET: { + node = node_alloc(parser, NODE_LET, prev); + if (!node) return; + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol name on let expression")); + parse_symbol(parser); + node->var_name = array_pop(parser->nodes); + if (node->var_name->next) { + parse_emit_err(parser, prev, + cstr("invalid symbol name in let expression")); + return; + } + + // Optional type declaration. + if (parse_match(parser, TOK_COLON)) { + parse_type(parser); + node->var_type = array_pop(parser->nodes); + } + + // Optional assignment. + if (parse_match(parser, TOK_ASSIGN)) { + parse_expr(parser, PREC_LOW); + node->var_val = array_pop(parser->nodes); + } + } break; + case TOK_SET: { + node = node_alloc(parser, NODE_SET, prev); + if (!node) return; + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol name on let expression")); + parse_symbol(parser); + node->var_name = array_pop(parser->nodes); + parse_consume(parser, TOK_ASSIGN, + cstr("expected assignment on set expression")); + parse_expr(parser, PREC_LOW); + node->var_val = array_pop(parser->nodes); + } break; + case TOK_STRUCT: { + node = node_alloc(parser, NODE_STRUCT, prev); + if (!node) return; + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol name on struct definition")); + // Just consume this to avoid conflicts with struct literals. + node->value.sym = parser->previous.val; + + parse_consume(parser, TOK_LCURLY, + cstr("expected '{' on struct definition")); + + while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { + Node *field = + node_alloc(parser, NODE_VAL_FIELD, parser->current); + if (!field) return; + parse_consume( + parser, TOK_SYMBOL, + cstr("expected symbol name on struct definition")); + parse_symbol(parser); + field->field_name = array_pop(parser->nodes); + if (field->field_name->next || + field->field_name->kind == NODE_SYMBOL_IDX) { + parse_emit_err(parser, prev, + cstr("invalid symbol name in struct field")); + return; + } + parse_consume( + parser, TOK_COLON, + cstr("expected symbol name on struct definition")); + parse_type(parser); + field->field_type = array_pop(parser->nodes); + + // Optional assignment. + if (parse_match(parser, TOK_ASSIGN)) { + parse_expr(parser, PREC_LOW); + field->field_val = array_pop(parser->nodes); + } + + array_push(node->struct_field, field, parser->storage); + } + } break; + case TOK_IF: { + node = node_alloc(parser, NODE_IF, prev); + if (!node) return; + parse_consume(parser, TOK_LPAREN, + cstr("expected '(' on if expression")); + parse_expr(parser, PREC_LOW); + node->cond_if = array_pop(parser->nodes); + parse_consume(parser, TOK_RPAREN, + cstr("expected ')' on if expression")); + parse_expr(parser, PREC_LOW); + node->cond_expr = array_pop(parser->nodes); + if (parse_match(parser, TOK_ELSE)) { + parse_expr(parser, PREC_LOW); + node->cond_else = array_pop(parser->nodes); + } + } break; + case TOK_MATCH: { + node = node_alloc(parser, NODE_MATCH, prev); + if (!node) return; + parse_consume(parser, TOK_LPAREN, + cstr("expected '(' on if expression")); + parse_expr(parser, PREC_LOW); + node->match_expr = array_pop(parser->nodes); + parse_consume(parser, TOK_RPAREN, + cstr("expected ')' on if expression")); + parse_consume(parser, TOK_LCURLY, + cstr("expected block of match cases")); + while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { + Node *tmp = node_alloc(parser, NODE_CASE, parser->previous); + if (!tmp) return; + // Are we on the default case. + if (!parse_match(parser, TOK_ELSE)) { + parse_consume(parser, TOK_CASE, + cstr("expected case statement")); + parse_expr(parser, PREC_LOW); + tmp->case_value = array_pop(parser->nodes); + } + parse_consume(parser, TOK_ASSIGN, + cstr("malformed case statement")); + parse_expr(parser, PREC_LOW); + tmp->case_expr = array_pop(parser->nodes); + array_push(node->match_cases, tmp, parser->storage); + } + // TODO: Check that we only have literals on the match case, this + // could be done on the analysis step, but also here... + // TODO: Check that there are no multiple default or duplicated + // cases. + } break; + case TOK_ENUM: { + node = node_alloc(parser, NODE_ENUM, prev); + if (!node) return; + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol name on enum definition")); + // Just consume this to avoid conflicts with struct literals. + node->value.sym = parser->previous.val; + parse_consume(parser, TOK_LCURLY, + cstr("expected '{' on enum definition")); + while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { + Node *field = + node_alloc(parser, NODE_VAL_FIELD, parser->current); + if (!field) return; + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol name on enum definition")); + parse_symbol(parser); + field->field_name = array_pop(parser->nodes); + if (field->field_name->next || + field->field_name->kind == NODE_SYMBOL_IDX) { + parse_emit_err(parser, prev, + cstr("invalid symbol name in enum field")); + return; + } + if (parse_match(parser, TOK_ASSIGN)) { + parse_expr(parser, PREC_LOW); + field->field_val = array_pop(parser->nodes); + } + array_push(node->struct_field, field, parser->storage); + } + } break; + case TOK_COND: { + node = node_alloc(parser, NODE_COND, prev); + if (!node) return; + parse_consume(parser, TOK_LCURLY, + cstr("expected '{' on cond expression")); + while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { + Node *tmp = node_alloc(parser, NODE_CASE, parser->previous); + if (!tmp) return; + // Are we on the default case. + if (!parse_match(parser, TOK_ELSE)) { + parse_consume(parser, TOK_CASE, + cstr("expected case statement")); + parse_expr(parser, PREC_LOW); + tmp->case_value = array_pop(parser->nodes); + } + parse_consume(parser, TOK_ASSIGN, + cstr("malformed case statement")); + parse_expr(parser, PREC_LOW); + tmp->case_expr = array_pop(parser->nodes); + array_push(node->match_cases, tmp, parser->storage); + } + } break; + case TOK_BREAK: { + node = node_alloc(parser, NODE_BREAK, prev); + if (!node) return; + } break; + case TOK_CONTINUE: { + node = node_alloc(parser, NODE_CONTINUE, prev); + if (!node) return; + } break; + case TOK_WHILE: { + node = node_alloc(parser, NODE_WHILE, prev); + if (!node) return; + parse_consume(parser, TOK_LPAREN, + cstr("expected '(' on while expression")); + parse_expr(parser, PREC_LOW); + node->while_cond = array_pop(parser->nodes); + parse_consume(parser, TOK_RPAREN, + cstr("expected ')' on while expression")); + parse_expr(parser, PREC_LOW); + node->while_expr = array_pop(parser->nodes); + } break; + default: return; // Unreachable. } - push_error(ERR_TYPE_PARSER, ERR_UNMATCHED_PAREN, tok.line, tok.col); - return NULL; + array_push(parser->nodes, node, parser->storage); } -Node * -parse_def(Parser *parser) { - Token tok = next_token(parser); - - Node *symbol = parse_symbol(parser); - if (symbol == NULL) { - return NULL; - } - - // TODO: Making type definitions mandatory for now until we introduce type - // inference. - Node *type = parse_type(parser); - if (type == NULL) { - return NULL; - } - - Node *value = parse_next(parser); - if (value == NULL) { - return NULL; - } - - if (!consume_rparen(parser)) { - return NULL; +void +parse_binary(Parser *parser) { + if (parser->panic) return; + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing binary "); + print_token(prev); +#endif + ParseRule rule = parse_rules[prev.kind]; + parse_expr(parser, rule.precedence + 1); + + Node *node; + switch (prev.kind) { + case TOK_ADD: node = node_alloc(parser, NODE_ADD, prev); break; + case TOK_SUB: node = node_alloc(parser, NODE_SUB, prev); break; + case TOK_MUL: node = node_alloc(parser, NODE_MUL, prev); break; + case TOK_DIV: node = node_alloc(parser, NODE_DIV, prev); break; + case TOK_MOD: node = node_alloc(parser, NODE_MOD, prev); break; + case TOK_AND: node = node_alloc(parser, NODE_AND, prev); break; + case TOK_OR: node = node_alloc(parser, NODE_OR, prev); break; + case TOK_EQ: node = node_alloc(parser, NODE_EQ, prev); break; + case TOK_NEQ: node = node_alloc(parser, NODE_NEQ, prev); break; + case TOK_LT: node = node_alloc(parser, NODE_LT, prev); break; + case TOK_GT: node = node_alloc(parser, NODE_GT, prev); break; + case TOK_LE: node = node_alloc(parser, NODE_LE, prev); break; + case TOK_GE: node = node_alloc(parser, NODE_GE, prev); break; + case TOK_BITAND: { + node = node_alloc(parser, NODE_BITAND, prev); + } break; + case TOK_BITOR: { + node = node_alloc(parser, NODE_BITOR, prev); + } break; + case TOK_BITLSHIFT: { + node = node_alloc(parser, NODE_BITLSHIFT, prev); + } break; + case TOK_BITRSHIFT: { + node = node_alloc(parser, NODE_BITRSHIFT, prev); + } break; + default: { + parse_emit_err(parser, prev, cstr("unreachable")); + return; + } } - - Node *node = alloc_node(NODE_DEF); - node->def.symbol = symbol; - node->def.value = value; - node->def.type = type; - node->line = tok.line; - node->col = tok.col; - return node; + if (!node) return; + node->right = array_pop(parser->nodes); + node->left = array_pop(parser->nodes); + array_push(parser->nodes, node, parser->storage); } -Node * -parse_set(Parser *parser) { - Token tok = next_token(parser); - - Node *symbol = parse_symbol(parser); - if (symbol == NULL) { - return NULL; - } - - Node *value = parse_next(parser); - if (value == NULL) { - return NULL; - } - - if (!consume_rparen(parser)) { - return NULL; +void +parse_number(Parser *parser) { + if (parser->panic) return; + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing number "); + print_token(prev); +#endif + Node *node = NULL; + switch (prev.kind) { + case TOK_NUM_INT: { + if (str_has_prefix(prev.val, cstr("0x")) || + str_has_prefix(prev.val, cstr("0b"))) { + node = node_alloc(parser, NODE_NUM_UINT, prev); + if (!node) return; + node->value.u = str_to_uint(prev.val); + } else { + node = node_alloc(parser, NODE_NUM_INT, prev); + if (!node) return; + node->value.i = str_to_int(prev.val); + } + } break; + case TOK_NUM_FLOAT: { + node = node_alloc(parser, NODE_NUM_FLOAT, prev); + if (!node) return; + node->value.d = str_to_float(prev.val); + } break; + case TOK_CHAR: { + node = node_alloc(parser, NODE_NUM_INT, prev); + if (!node) return; + node->value.i = prev.val.mem[1]; + } break; + default: break; } - - Node *node = alloc_node(NODE_SET); - node->set.symbol = symbol; - node->set.value = value; - node->line = tok.line; - node->col = tok.col; - return node; + array_push(parser->nodes, node, parser->storage); } -Node * -parse_fun(Parser *parser) { - Token tok = next_token(parser); - - Node *name = parse_symbol(parser); - if (name == NULL) { - return NULL; - } - - Node *node = alloc_node(NODE_FUN); - node->fun.name = name; - array_init(node->fun.param_names, 0); - array_init(node->fun.param_types, 0); - node->line = tok.line; - node->col = tok.col; +void +parse_string(Parser *parser) { + if (parser->panic) return; + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing string "); + print_token(prev); +#endif + Node *node = node_alloc(parser, NODE_STRING, prev); + if (!node) return; + node->value.str = str_remove_prefix(prev.val, cstr("\"")); + node->value.str = str_remove_suffix(node->value.str, cstr("\"")); + array_push(parser->nodes, node, parser->storage); +} - // Parse parameter list and return type. - if (!consume_lparen(parser)) { - return NULL; - } - while (true) { - Token next = peek_token(parser); - if (next.type == TOKEN_RPAREN) { - next_token(parser); - break; +void +parse_symbol(Parser *parser) { + if (parser->panic) return; + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing symbol "); + print_token(prev); +#endif + if (prev.kind == TOK_AT) { + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol after '.' operator")); + parse_symbol(parser); + Node *node = array_pop(parser->nodes); + if (node) { + node->is_ptr = true; + array_push(parser->nodes, node, parser->storage); } - Node *name = parse_symbol(parser); - if (name == NULL) { - return NULL; + return; + } + Node *node = node_alloc(parser, NODE_SYMBOL, prev); + if (!node) return; + if (parse_match(parser, TOK_DOT)) { + // Symbol chain. + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol after '.' operator")); + parse_symbol(parser); + node->next = array_pop(parser->nodes); + } else if (parse_match(parser, TOK_LCURLY)) { + // Struct literal. + node->kind = NODE_STRUCT_LIT; + while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { + Node *field = node_alloc(parser, NODE_VAL_FIELD, parser->current); + parse_consume(parser, TOK_SYMBOL, cstr("expected symbol name")); + parse_symbol(parser); + field->field_name = array_pop(parser->nodes); + parse_consume(parser, TOK_ASSIGN, cstr("expected assignment")); + parse_expr(parser, PREC_LOW); + field->field_type = array_pop(parser->nodes); + array_push(node->elements, field, parser->storage); } - Node *type = parse_type(parser); - if (type == NULL) { - return NULL; + } else if (parse_match(parser, TOK_LSQUARE)) { + node->kind = NODE_SYMBOL_IDX; + parse_consume(parser, TOK_NUM_INT, cstr("no array size given")); + parse_number(parser); + node->arr_size = array_pop(parser->nodes); + parse_consume(parser, TOK_RSQUARE, + cstr("unmatched brackets ']' in array type")); + if (parse_match(parser, TOK_DOT)) { + // Symbol chain. + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol after '.' operator")); + parse_symbol(parser); + node->next = array_pop(parser->nodes); } - array_push(node->fun.param_names, name); - array_push(node->fun.param_types, type); - } - Node *ret_type = parse_type(parser); - if (ret_type == NULL) { - return NULL; - } - node->fun.return_type = ret_type; - - Node *body = parse_next(parser); - if (body == NULL) { - return NULL; - } - node->fun.body = body; - if (!consume_rparen(parser)) { - return NULL; + } else { + node = node_alloc(parser, NODE_SYMBOL, prev); + if (!node) return; } - - return node; + node->value.sym = prev.val; + array_push(parser->nodes, node, parser->storage); } -Node * -parse_if(Parser *parser) { - Token tok = next_token(parser); - - Node *node = alloc_node(NODE_IF); - node->ifexpr.cond = NULL; - node->ifexpr.expr_true = NULL; - node->ifexpr.expr_false = NULL; - node->line = tok.line; - node->col = tok.col; - - Node *cond = parse_next(parser); - if (cond == NULL) { - return NULL; - } - Node *expr_true = parse_next(parser); - if (expr_true == NULL) { - return NULL; - } - node->ifexpr.cond = cond; - node->ifexpr.expr_true = expr_true; - tok = peek_token(parser); - - // Optional else statement. - if (tok.type != TOKEN_RPAREN) { - Node *expr_false = parse_next(parser); - if (expr_false == NULL) { - return NULL; - } - node->ifexpr.expr_false = expr_false; - } - - if (!consume_rparen(parser)) { - return NULL; - } - - return node; +void +parse_grouping(Parser *parser) { + if (parser->panic) return; +#if DEBUG == 1 + print("parsing group "); + print_token(parser->previous); +#endif + parse_expr(parser, PREC_LOW); + parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression")); } -Node * -parse_paren(Parser *parser) { - next_token(parser); // Skip paren. - - Token tok = peek_token(parser); - - switch (tok.type) { - // Builtin functions. - case TOKEN_ADD: - case TOKEN_SUB: - case TOKEN_MUL: - case TOKEN_DIV: - case TOKEN_MOD: - case TOKEN_NOT: - case TOKEN_AND: - case TOKEN_OR: - case TOKEN_EQ: - case TOKEN_LT: - case TOKEN_GT: - case TOKEN_LE: - case TOKEN_GE: { return parse_builtin(parser); } break; - // Special functions. - case TOKEN_DEF: { return parse_def(parser); } break; - case TOKEN_SET: { return parse_set(parser); } break; - case TOKEN_FUN: { return parse_fun(parser); } break; - case TOKEN_IF: { return parse_if(parser); } break; +void +graph_node(Node *node) { + if (node == NULL) return; + print("%d [width=2.5,shape=Mrecord,label=\"", node->id); + print("{ %s | { L: %d | C: %d } } ", node_str[node->kind], + node->line, node->col); + switch (node->kind) { + case NODE_NUM_INT: print("| Value: %d", node->value.i); break; + case NODE_NUM_UINT: print("| Value: %x", node->value.u); break; + case NODE_NUM_FLOAT: print("| Value: %f{2}", node->value.d); break; + case NODE_STRING: print("| Value: %s", node->value.str); break; + case NODE_SYMBOL_IDX: + case NODE_STRUCT: + case NODE_STRUCT_LIT: print("| Name: %s", node->value.sym); break; + case NODE_SYMBOL: + case NODE_TYPE: { + if (node->is_ptr) { + print("| Name: @%s", node->value.sym); + } else { + print("| Name: %s", node->value.sym); + } + } break; default: break; } - - return parse_funcall(parser); -} - -Node * -parse_block(Parser *parser) { - Token tok = next_token(parser); - - Node *node = alloc_node(NODE_BLOCK); - array_init(node->block.expr, 0); - node->line = tok.line; - node->col = tok.col; - while (true) { - Token next = peek_token(parser); - if (next.type == TOKEN_RCURLY) { - next_token(parser); - break; - } - Node *expr = parse_next(parser); - if (expr == NULL) { - return NULL; - } - array_push(node->block.expr, expr); - } - return node; -} - -Node * -parse_next(Parser *parser) { - Token tok = peek_token(parser); - switch (tok.type) { - case TOKEN_NUMBER: { return parse_number(parser); } break; - case TOKEN_STRING: { return parse_string(parser); } break; - case TOKEN_SYMBOL: { return parse_symbol(parser); } break; - case TOKEN_TRUE: - case TOKEN_FALSE: { return parse_bool(parser); } break; - case TOKEN_LPAREN: { return parse_paren(parser); } break; - case TOKEN_EOF: { return NULL; } break; - case TOKEN_LCURLY: { return parse_block(parser); } break; + println("\"];"); + + switch (node->kind) { + case NODE_COND: + case NODE_MATCH: { + if (node->match_expr) { + println("%d:e->%d:w;", node->id, node->match_expr->id); + graph_node(node->match_expr); + } + for (sz i = 0; i < array_size(node->match_cases); i++) { + Node *next = node->match_cases[i]; + println("%d:e->%d:w;", node->id, next->id); + graph_node(next); + } + } break; + case NODE_BLOCK: + case NODE_STRUCT_LIT: + case NODE_COMPOUND_TYPE: { + for (sz i = 0; i < array_size(node->elements); i++) { + Node *next = node->elements[i]; + println("%d:e->%d:w;", node->id, next->id); + graph_node(next); + } + } break; + case NODE_ENUM: + case NODE_STRUCT: { + for (sz i = 0; i < array_size(node->struct_field); i++) { + Node *next = node->struct_field[i]; + println("%d:e->%d:w;", node->id, next->id); + graph_node(next); + } + } break; + case NODE_IF: { + if (node->cond_if) { + println("%d:e->%d:w;", node->id, node->cond_if->id); + graph_node(node->cond_if); + } + if (node->cond_expr) { + println("%d:e->%d:w;", node->id, node->cond_expr->id); + graph_node(node->cond_expr); + } + if (node->cond_else) { + println("%d:e->%d:w;", node->id, node->cond_else->id); + graph_node(node->cond_else); + } + } break; + case NODE_VAL_FIELD: + case NODE_SET: + case NODE_LET: { + if (node->var_name) { + println("%d:e->%d:w;", node->id, node->var_name->id); + graph_node(node->var_name); + } + if (node->var_type) { + println("%d:e->%d:w;", node->id, node->var_type->id); + graph_node(node->var_type); + } + if (node->var_val) { + println("%d:e->%d:w;", node->id, node->var_val->id); + graph_node(node->var_val); + } + } break; default: { - push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_TOK_TYPE, tok.line, tok.col); - return NULL; + if (node->left) { + println("%d:e->%d:w;", node->id, node->left->id); + graph_node(node->left); + } + if (node->right) { + println("%d:e->%d:w;", node->id, node->right->id); + graph_node(node->right); + } } break; } } -bool -parse_roots(Parser *parser) { - while (has_next(parser)) { - Token tok = peek_token(parser); - if (tok.type == TOKEN_EOF) { - break; - } - Node *node = parse_next(parser); - if (node == NULL) { - return false; - } - array_push(parser->roots, node); - } - return true; +void +graph_ast(Node **roots) { + if (roots == NULL) return; + println("digraph ast {"); + println("rankdir=LR;"); + println("ranksep=\"0.95 equally\";"); + println("nodesep=\"0.5 equally\";"); + println("overlap=scale;"); + println("bgcolor=\"transparent\";"); + for (sz i = 0; i < array_size(roots); ++i) { + println("subgraph %d {", i); + Node *root = roots[array_size(roots) - 1 - i]; + graph_node(root); + println("}"); + } + println("}"); } -Root * -parse(Token *tokens) { - Parser parser = { - .tokens = tokens, - .current_token = 0, - }; - array_init(parser.roots, 0); - - if (!parse_roots(&parser)) { - return NULL; - } - return parser.roots; -} +#endif // PARSER_C diff --git a/src/parser.h b/src/parser.h deleted file mode 100644 index 206ca4c..0000000 --- a/src/parser.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef BDL_PARSER_H -#define BDL_PARSER_H - -#include "lexer.h" -#include "nodes.h" - -typedef Node* Root; - -typedef struct Parser { - Token *tokens; - size_t current_token; - Root *roots; -} Parser; - -Root * parse(Token *tokens); -Node * parse_next(Parser *parser); - -#endif // BDL_PARSER_H -- cgit v1.2.1