From 27e6bf016e87702d30ebb7c0716ff3e4f08a8823 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Wed, 19 Jun 2024 08:51:35 +0200 Subject: Move parser to its own file --- src/main.c | 978 +------------------------------------------------------------ 1 file changed, 1 insertion(+), 977 deletions(-) (limited to 'src/main.c') diff --git a/src/main.c b/src/main.c index d001d9c..66142bd 100644 --- a/src/main.c +++ b/src/main.c @@ -4,6 +4,7 @@ #include "badlib.h" #include "lexer.c" +#include "parser.c" typedef enum ExecMode { RUN_NORMAL, @@ -20,983 +21,6 @@ init(void) { log_init_default(); } -void -print_token(Token tok) { - println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.kind], tok.val); -} - -void -print_tokens(Str path, Token *tokens) { - for (sz i = 0; i < array_size(tokens); i++) { - Token tok = tokens[i]; - print("%s:", path); - print_token(tok); - } -} - -// -// Nodes -// - -typedef enum NodeKind { - // Arithmetic. - NODE_ADD, - NODE_SUB, - NODE_DIV, - NODE_MUL, - NODE_MOD, - // Logical. - NODE_NOT, - NODE_AND, - NODE_OR, - NODE_EQ, - NODE_NEQ, - NODE_LT, - NODE_GT, - NODE_LE, - NODE_GE, - // Bitwise ops. - NODE_BITNOT, - NODE_BITAND, - NODE_BITOR, - NODE_BITLSHIFT, - NODE_BITRSHIFT, - // Literals. - NODE_STRING, - NODE_SYMBOL, - NODE_NUM_INT, - NODE_NUM_UINT, - NODE_NUM_FLOAT, - NODE_TRUE, - NODE_FALSE, - NODE_NIL, - NODE_STRUCT_LIT, - // Keywords. - NODE_LET, - NODE_SET, - NODE_STRUCT, - NODE_IF, - NODE_MATCH, - NODE_CASE, - NODE_COND, - NODE_ENUM, - NODE_BREAK, - NODE_CONTINUE, - NODE_WHILE, - // Helpers. - NODE_SYMBOL_IDX, - NODE_TYPE, - NODE_ARR_TYPE, - NODE_COMPOUND_TYPE, - NODE_VAL_FIELD, - NODE_BLOCK, -} NodeKind; - -Str node_str[] = { - // Arithmetic. - [NODE_ADD] = cstr("ADD"), - [NODE_SUB] = cstr("SUB"), - [NODE_DIV] = cstr("DIV"), - [NODE_MUL] = cstr("MUL"), - [NODE_MOD] = cstr("MOD"), - // Logical. - [NODE_NOT] = cstr("NOT"), - [NODE_AND] = cstr("AND"), - [NODE_OR] = cstr("OR"), - [NODE_EQ] = cstr("EQ"), - [NODE_NEQ] = cstr("NEQ"), - [NODE_LT] = cstr("LT"), - [NODE_GT] = cstr("GT"), - [NODE_LE] = cstr("LE"), - [NODE_GE] = cstr("GE"), - // Bitwise ops. - [NODE_BITNOT] = cstr("BITNOT"), - [NODE_BITAND] = cstr("BITAND"), - [NODE_BITOR] = cstr("BITOR"), - [NODE_BITLSHIFT] = cstr("BITLSHIFT"), - [NODE_BITRSHIFT] = cstr("BITRSHIFT"), - // Literals. - [NODE_STRING] = cstr("STRING"), - [NODE_SYMBOL] = cstr("SYMBOL"), - [NODE_NUM_INT] = cstr("INT"), - [NODE_NUM_UINT] = cstr("UINT"), - [NODE_NUM_FLOAT] = cstr("FLOAT"), - [NODE_TRUE] = cstr("TRUE"), - [NODE_FALSE] = cstr("FALSE"), - [NODE_NIL] = cstr("NIL"), - [NODE_STRUCT_LIT] = cstr("STRUCT LIT"), - // Keywords. - [NODE_LET] = cstr("LET"), - [NODE_SET] = cstr("SET"), - [NODE_STRUCT] = cstr("STRUCT DEF"), - [NODE_IF] = cstr("IF"), - [NODE_MATCH] = cstr("MATCH"), - [NODE_CASE] = cstr("CASE"), - [NODE_COND] = cstr("COND"), - [NODE_ENUM] = cstr("ENUM"), - [NODE_BREAK] = cstr("BREAK"), - [NODE_CONTINUE] = cstr("CONTINUE"), - [NODE_WHILE] = cstr("WHILE"), - // Helpers. - [NODE_TYPE] = cstr("TYPE"), - [NODE_ARR_TYPE] = cstr("TYPE (ARR)"), - [NODE_SYMBOL_IDX] = cstr("SYMBOL[IDX]"), - [NODE_COMPOUND_TYPE] = cstr("TYPE (COMPOUND)"), - [NODE_VAL_FIELD] = cstr("FIELD"), - [NODE_BLOCK] = cstr("BLOCK"), -}; - -typedef struct Node { - sz id; - sz line; - sz col; - - NodeKind kind; - union { - f64 d; - sz i; - u64 u; - Str str; - Str sym; - } value; - union { - struct { - struct Node *left; - struct Node *right; - }; - struct { - struct Node *next; - struct Node *arr_size; - }; - struct { - struct Node *var_name; - struct Node *var_type; - struct Node *var_val; - }; - struct { - struct Node *while_cond; - struct Node *while_expr; - }; - struct { - struct Node *cond_if; - struct Node *cond_expr; - struct Node *cond_else; - }; - struct { - struct Node *field_name; - struct Node *field_type; - struct Node *field_val; - }; - struct { - struct Node *match_expr; - struct Node **match_cases; - }; - struct { - struct Node *case_value; - struct Node *case_expr; - }; - struct Node **struct_field; - struct Node **elements; - struct Node **statements; - }; - bool is_ptr; -} Node; - -// -// Parsing. -// - -typedef struct Parser { - Token current; - Token previous; - Token *tokens; - sz idx; - - // Error handling. - bool err; - bool panic; - Str file_name; - - // Storage. - Node **nodes; - Arena *storage; -} Parser; - -typedef enum { - PREC_NONE = 0, - PREC_LOW, // lowest precedence - PREC_BITLOGIC, // & | - PREC_BITSHIFT, // << >> - PREC_OR, // || - PREC_AND, // && - PREC_EQUALITY, // == != - PREC_COMPARISON, // < > <= >= - PREC_TERM, // + - - PREC_FACTOR, // * / % - PREC_UNARY, // ! - - PREC_CALL, // . () - PREC_PRIMARY // highest precedence -} ParsePrecedence; - -typedef void (*ParseFn)(Parser *); - -typedef struct { - ParseFn prefix; - ParseFn infix; - ParsePrecedence precedence; -} ParseRule; - -void parse_expr(Parser *parser, ParsePrecedence precedence); -void parse_advance(Parser *parser); -bool parse_match(Parser *parser, TokenKind kind); - -void parse_grouping(Parser *parser); -void parse_unary(Parser *parser); -void parse_binary(Parser *parser); -void parse_number(Parser *parser); -void parse_literal(Parser *parser); -void parse_string(Parser *parser); -void parse_symbol(Parser *parser); -void parse_keyword(Parser *parser); -void parse_type(Parser *parser); -void parse_block(Parser *parser); - -ParseRule parse_rules[] = { - [TOK_LPAREN] = {parse_grouping, NULL, PREC_NONE}, - [TOK_LCURLY] = {parse_block, NULL, PREC_NONE}, - [TOK_AT] = {parse_symbol, NULL, PREC_NONE}, - - // Arithmetic. - [TOK_SUB] = {parse_unary, parse_binary, PREC_TERM}, - [TOK_ADD] = {NULL, parse_binary, PREC_TERM}, - [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, - [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, - [TOK_MOD] = {NULL, parse_binary, PREC_FACTOR}, - - // Logical. - [TOK_NOT] = {parse_unary, NULL, PREC_NONE}, - [TOK_AND] = {NULL, parse_binary, PREC_AND}, - [TOK_OR] = {NULL, parse_binary, PREC_OR}, - [TOK_EQ] = {NULL, parse_binary, PREC_EQUALITY}, - [TOK_NEQ] = {NULL, parse_binary, PREC_EQUALITY}, - [TOK_LT] = {NULL, parse_binary, PREC_COMPARISON}, - [TOK_GT] = {NULL, parse_binary, PREC_COMPARISON}, - [TOK_LE] = {NULL, parse_binary, PREC_COMPARISON}, - [TOK_GE] = {NULL, parse_binary, PREC_COMPARISON}, - - // Bitwise ops. - [TOK_BITNOT] = {parse_unary, NULL, PREC_NONE}, - [TOK_BITAND] = {NULL, parse_binary, PREC_BITLOGIC}, - [TOK_BITOR] = {NULL, parse_binary, PREC_BITLOGIC}, - [TOK_BITLSHIFT] = {NULL, parse_binary, PREC_BITSHIFT}, - [TOK_BITRSHIFT] = {NULL, parse_binary, PREC_BITSHIFT}, - - // Literals. - [TOK_STRING] = {parse_string, NULL, PREC_NONE}, - [TOK_SYMBOL] = {parse_symbol, NULL, PREC_NONE}, - [TOK_CHAR] = {parse_number, NULL, PREC_NONE}, - [TOK_NUM_INT] = {parse_number, NULL, PREC_NONE}, - [TOK_NUM_FLOAT] = {parse_number, NULL, PREC_NONE}, - [TOK_TRUE] = {parse_literal, NULL, PREC_NONE}, - [TOK_FALSE] = {parse_literal, NULL, PREC_NONE}, - [TOK_NIL] = {parse_literal, NULL, PREC_NONE}, - - // Statements. - [TOK_LET] = {parse_keyword, NULL, PREC_NONE}, - [TOK_SET] = {parse_keyword, NULL, PREC_NONE}, - [TOK_STRUCT] = {parse_keyword, NULL, PREC_NONE}, - [TOK_ENUM] = {parse_keyword, NULL, PREC_NONE}, - [TOK_IF] = {parse_keyword, NULL, PREC_NONE}, - [TOK_MATCH] = {parse_keyword, NULL, PREC_NONE}, - [TOK_COND] = {parse_keyword, NULL, PREC_NONE}, - [TOK_WHILE] = {parse_keyword, NULL, PREC_NONE}, - [TOK_CONTINUE] = {parse_keyword, NULL, PREC_NONE}, - [TOK_BREAK] = {parse_keyword, NULL, PREC_NONE}, - [TOK_FUN] = {parse_keyword, NULL, PREC_NONE}, - [TOK_RETURN] = {parse_keyword, NULL, PREC_NONE}, - - [TOK_EOF] = {NULL, NULL, PREC_NONE}, -}; - -Node * -node_alloc(Parser *parser, NodeKind kind, Token tok) { - if (parser->panic) return NULL; - static sz id = 0; - Node *node = arena_calloc((sz)sizeof(Node), parser->storage); - node->id = id++; - node->kind = kind; - node->line = tok.line; - node->col = tok.col; - return node; -} - -void -parse_emit_err(Parser *parser, Token token, Str msg) { - if (parser->panic) return; - parser->panic = true; - parser->err = true; - eprint("%s:%d:%d: error: %s", parser->file_name, token.line, token.col, - msg); - - if (token.kind == TOK_EOF) { - eprintln(" at end of the file"); - } else if (token.kind != TOK_UNKNOWN) { - eprintln(" at %s", token.val); - } -} - -void -parse_advance(Parser *parser) { - assert(parser); - parser->previous = parser->current; - if (parser->previous.kind == TOK_EOF) { - return; - } - parser->current = parser->tokens[parser->idx++]; -} - -bool -parse_match(Parser *parser, TokenKind kind) { - assert(parser); - if (parser->current.kind != kind) { - return false; - } - parse_advance(parser); - return true; -} - -static void -parse_consume(Parser *parser, TokenKind kind, Str msg) { - if (parser->current.kind == kind) { - parse_advance(parser); - return; - } - parse_emit_err(parser, parser->current, msg); -} - -void -parse_block(Parser *parser) { -#if DEBUG == 1 - print("parsing block "); - print_token(parser->previous); -#endif - Node *block = node_alloc(parser, NODE_BLOCK, parser->previous); - if (!block) return; - while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { - parse_expr(parser, PREC_LOW); - Node *next = array_pop(parser->nodes); - array_push(block->statements, next, parser->storage); - } - array_push(parser->nodes, block, parser->storage); -} - -void -parse_expr(Parser *parser, ParsePrecedence precedence) { - parse_advance(parser); - // TODO: synchronize panic mode on keywords. - if (parser->panic) return; - ParseFn prefix = parse_rules[parser->previous.kind].prefix; - if (prefix == NULL) { - parse_emit_err(parser, parser->previous, cstr("expected expression")); - return; - } - prefix(parser); - - while (precedence <= parse_rules[parser->current.kind].precedence) { - parse_advance(parser); - ParseFn infix = parse_rules[parser->previous.kind].infix; - if (infix == NULL) { - parse_emit_err(parser, parser->previous, - cstr("expected expression")); - return; - } - infix(parser); - } -} - -void -parse_unary(Parser *parser) { - if (parser->panic) return; - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing unary "); - print_token(prev); -#endif - parse_expr(parser, PREC_UNARY); - Node *node = NULL; - switch (prev.kind) { - case TOK_NOT: node = node_alloc(parser, NODE_NOT, prev); break; - case TOK_BITNOT: node = node_alloc(parser, NODE_BITNOT, prev); break; - default: break; // Unreachable. - } - if (!node) return; - node->left = array_pop(parser->nodes); - array_push(parser->nodes, node, parser->storage); -} - -void -parse_literal(Parser *parser) { - if (parser->panic) return; - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing literal "); - print_token(prev); -#endif - Node *node = NULL; - switch (prev.kind) { - case TOK_TRUE: node = node_alloc(parser, NODE_TRUE, prev); break; - case TOK_FALSE: node = node_alloc(parser, NODE_FALSE, prev); break; - case TOK_NIL: node = node_alloc(parser, NODE_NIL, prev); break; - default: return; // Unreachable. - } - if (!node) return; - array_push(parser->nodes, node, parser->storage); -} - -void -parse_type(Parser *parser) { - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing type "); - print_token(prev); -#endif - Node *node = node_alloc(parser, NODE_TYPE, prev); - if (!node) return; - if (parse_match(parser, TOK_AT)) { - node->is_ptr = true; - } - if (parse_match(parser, TOK_LCURLY)) { - node->kind = NODE_COMPOUND_TYPE; - while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { - parse_consume(parser, TOK_SYMBOL, cstr("expected name")); - parse_symbol(parser); - Node *next = array_pop(parser->nodes); - parse_consume(parser, TOK_COLON, cstr("incomplete type")); - parse_type(parser); - next->next = array_pop(parser->nodes); - array_push(node->elements, next, parser->storage); - } - } else { - parse_consume(parser, TOK_SYMBOL, - cstr("no type given for struct field")); - node->value.sym = parser->previous.val; - // Optional array value? - if (parse_match(parser, TOK_LSQUARE)) { - node->kind = NODE_ARR_TYPE, - parse_consume(parser, TOK_NUM_INT, cstr("no array size given")); - parse_number(parser); - node->arr_size = array_pop(parser->nodes); - parse_consume(parser, TOK_RSQUARE, - cstr("unmatched brackets ']' in array type")); - } - } - array_push(parser->nodes, node, parser->storage); -} - -void -parse_keyword(Parser *parser) { - if (parser->panic) return; - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing keyword "); - print_token(prev); -#endif - Node *node = NULL; - switch (prev.kind) { - case TOK_LET: { - node = node_alloc(parser, NODE_LET, prev); - if (!node) return; - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol name on let expression")); - parse_symbol(parser); - node->var_name = array_pop(parser->nodes); - if (node->var_name->next) { - parse_emit_err(parser, prev, - cstr("invalid symbol name in let expression")); - return; - } - - // Optional type declaration. - if (parse_match(parser, TOK_COLON)) { - parse_type(parser); - node->var_type = array_pop(parser->nodes); - } - - // Optional assignment. - if (parse_match(parser, TOK_ASSIGN)) { - parse_expr(parser, PREC_LOW); - node->var_val = array_pop(parser->nodes); - } - } break; - case TOK_SET: { - node = node_alloc(parser, NODE_SET, prev); - if (!node) return; - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol name on let expression")); - parse_symbol(parser); - node->var_name = array_pop(parser->nodes); - parse_consume(parser, TOK_ASSIGN, - cstr("expected assignment on set expression")); - parse_expr(parser, PREC_LOW); - node->var_val = array_pop(parser->nodes); - } break; - case TOK_STRUCT: { - node = node_alloc(parser, NODE_STRUCT, prev); - if (!node) return; - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol name on struct definition")); - // Just consume this to avoid conflicts with struct literals. - node->value.sym = parser->previous.val; - - parse_consume(parser, TOK_LCURLY, - cstr("expected '{' on struct definition")); - - while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { - Node *field = - node_alloc(parser, NODE_VAL_FIELD, parser->current); - if (!field) return; - parse_consume( - parser, TOK_SYMBOL, - cstr("expected symbol name on struct definition")); - parse_symbol(parser); - field->field_name = array_pop(parser->nodes); - if (field->field_name->next || - field->field_name->kind == NODE_SYMBOL_IDX) { - parse_emit_err(parser, prev, - cstr("invalid symbol name in struct field")); - return; - } - parse_consume( - parser, TOK_COLON, - cstr("expected symbol name on struct definition")); - parse_type(parser); - field->field_type = array_pop(parser->nodes); - - // Optional assignment. - if (parse_match(parser, TOK_ASSIGN)) { - parse_expr(parser, PREC_LOW); - field->field_val = array_pop(parser->nodes); - } - - array_push(node->struct_field, field, parser->storage); - } - } break; - case TOK_IF: { - node = node_alloc(parser, NODE_IF, prev); - if (!node) return; - parse_consume(parser, TOK_LPAREN, - cstr("expected '(' on if expression")); - parse_expr(parser, PREC_LOW); - node->cond_if = array_pop(parser->nodes); - parse_consume(parser, TOK_RPAREN, - cstr("expected ')' on if expression")); - parse_expr(parser, PREC_LOW); - node->cond_expr = array_pop(parser->nodes); - if (parse_match(parser, TOK_ELSE)) { - parse_expr(parser, PREC_LOW); - node->cond_else = array_pop(parser->nodes); - } - } break; - case TOK_MATCH: { - node = node_alloc(parser, NODE_MATCH, prev); - if (!node) return; - parse_consume(parser, TOK_LPAREN, - cstr("expected '(' on if expression")); - parse_expr(parser, PREC_LOW); - node->match_expr = array_pop(parser->nodes); - parse_consume(parser, TOK_RPAREN, - cstr("expected ')' on if expression")); - parse_consume(parser, TOK_LCURLY, - cstr("expected block of match cases")); - while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { - Node *tmp = node_alloc(parser, NODE_CASE, parser->previous); - if (!tmp) return; - // Are we on the default case. - if (!parse_match(parser, TOK_ELSE)) { - parse_consume(parser, TOK_CASE, - cstr("expected case statement")); - parse_expr(parser, PREC_LOW); - tmp->case_value = array_pop(parser->nodes); - } - parse_consume(parser, TOK_ASSIGN, - cstr("malformed case statement")); - parse_expr(parser, PREC_LOW); - tmp->case_expr = array_pop(parser->nodes); - array_push(node->match_cases, tmp, parser->storage); - } - // TODO: Check that we only have literals on the match case, this - // could be done on the analysis step, but also here... - // TODO: Check that there are no multiple default or duplicated - // cases. - } break; - case TOK_ENUM: { - node = node_alloc(parser, NODE_ENUM, prev); - if (!node) return; - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol name on enum definition")); - // Just consume this to avoid conflicts with struct literals. - node->value.sym = parser->previous.val; - parse_consume(parser, TOK_LCURLY, - cstr("expected '{' on enum definition")); - while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { - Node *field = - node_alloc(parser, NODE_VAL_FIELD, parser->current); - if (!field) return; - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol name on enum definition")); - parse_symbol(parser); - field->field_name = array_pop(parser->nodes); - if (field->field_name->next || - field->field_name->kind == NODE_SYMBOL_IDX) { - parse_emit_err(parser, prev, - cstr("invalid symbol name in enum field")); - return; - } - if (parse_match(parser, TOK_ASSIGN)) { - parse_expr(parser, PREC_LOW); - field->field_val = array_pop(parser->nodes); - } - array_push(node->struct_field, field, parser->storage); - } - } break; - case TOK_COND: { - node = node_alloc(parser, NODE_COND, prev); - if (!node) return; - parse_consume(parser, TOK_LCURLY, - cstr("expected '{' on cond expression")); - while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { - Node *tmp = node_alloc(parser, NODE_CASE, parser->previous); - if (!tmp) return; - // Are we on the default case. - if (!parse_match(parser, TOK_ELSE)) { - parse_consume(parser, TOK_CASE, - cstr("expected case statement")); - parse_expr(parser, PREC_LOW); - tmp->case_value = array_pop(parser->nodes); - } - parse_consume(parser, TOK_ASSIGN, - cstr("malformed case statement")); - parse_expr(parser, PREC_LOW); - tmp->case_expr = array_pop(parser->nodes); - array_push(node->match_cases, tmp, parser->storage); - } - } break; - case TOK_BREAK: { - node = node_alloc(parser, NODE_BREAK, prev); - if (!node) return; - } break; - case TOK_CONTINUE: { - node = node_alloc(parser, NODE_CONTINUE, prev); - if (!node) return; - } break; - case TOK_WHILE: { - node = node_alloc(parser, NODE_WHILE, prev); - if (!node) return; - parse_consume(parser, TOK_LPAREN, - cstr("expected '(' on while expression")); - parse_expr(parser, PREC_LOW); - node->while_cond = array_pop(parser->nodes); - parse_consume(parser, TOK_RPAREN, - cstr("expected ')' on while expression")); - parse_expr(parser, PREC_LOW); - node->while_expr = array_pop(parser->nodes); - } break; - default: return; // Unreachable. - } - array_push(parser->nodes, node, parser->storage); -} - -void -parse_binary(Parser *parser) { - if (parser->panic) return; - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing binary "); - print_token(prev); -#endif - ParseRule rule = parse_rules[prev.kind]; - parse_expr(parser, rule.precedence + 1); - - Node *node; - switch (prev.kind) { - case TOK_ADD: node = node_alloc(parser, NODE_ADD, prev); break; - case TOK_SUB: node = node_alloc(parser, NODE_SUB, prev); break; - case TOK_MUL: node = node_alloc(parser, NODE_MUL, prev); break; - case TOK_DIV: node = node_alloc(parser, NODE_DIV, prev); break; - case TOK_MOD: node = node_alloc(parser, NODE_MOD, prev); break; - case TOK_AND: node = node_alloc(parser, NODE_AND, prev); break; - case TOK_OR: node = node_alloc(parser, NODE_OR, prev); break; - case TOK_EQ: node = node_alloc(parser, NODE_EQ, prev); break; - case TOK_NEQ: node = node_alloc(parser, NODE_NEQ, prev); break; - case TOK_LT: node = node_alloc(parser, NODE_LT, prev); break; - case TOK_GT: node = node_alloc(parser, NODE_GT, prev); break; - case TOK_LE: node = node_alloc(parser, NODE_LE, prev); break; - case TOK_GE: node = node_alloc(parser, NODE_GE, prev); break; - case TOK_BITAND: { - node = node_alloc(parser, NODE_BITAND, prev); - } break; - case TOK_BITOR: { - node = node_alloc(parser, NODE_BITOR, prev); - } break; - case TOK_BITLSHIFT: { - node = node_alloc(parser, NODE_BITLSHIFT, prev); - } break; - case TOK_BITRSHIFT: { - node = node_alloc(parser, NODE_BITRSHIFT, prev); - } break; - default: { - parse_emit_err(parser, prev, cstr("unreachable")); - return; - } - } - if (!node) return; - node->right = array_pop(parser->nodes); - node->left = array_pop(parser->nodes); - array_push(parser->nodes, node, parser->storage); -} - -void -parse_number(Parser *parser) { - if (parser->panic) return; - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing number "); - print_token(prev); -#endif - Node *node = NULL; - switch (prev.kind) { - case TOK_NUM_INT: { - if (str_has_prefix(prev.val, cstr("0x")) || - str_has_prefix(prev.val, cstr("0b"))) { - node = node_alloc(parser, NODE_NUM_UINT, prev); - if (!node) return; - node->value.u = str_to_uint(prev.val); - } else { - node = node_alloc(parser, NODE_NUM_INT, prev); - if (!node) return; - node->value.i = str_to_int(prev.val); - } - } break; - case TOK_NUM_FLOAT: { - node = node_alloc(parser, NODE_NUM_FLOAT, prev); - if (!node) return; - node->value.d = str_to_float(prev.val); - } break; - case TOK_CHAR: { - node = node_alloc(parser, NODE_NUM_INT, prev); - if (!node) return; - node->value.i = prev.val.mem[1]; - } break; - default: break; - } - array_push(parser->nodes, node, parser->storage); -} - -void -parse_string(Parser *parser) { - if (parser->panic) return; - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing string "); - print_token(prev); -#endif - Node *node = node_alloc(parser, NODE_STRING, prev); - if (!node) return; - node->value.str = str_remove_prefix(prev.val, cstr("\"")); - node->value.str = str_remove_suffix(node->value.str, cstr("\"")); - array_push(parser->nodes, node, parser->storage); -} - -void -parse_symbol(Parser *parser) { - if (parser->panic) return; - Token prev = parser->previous; -#if DEBUG == 1 - print("parsing symbol "); - print_token(prev); -#endif - if (prev.kind == TOK_AT) { - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol after '.' operator")); - parse_symbol(parser); - Node *node = array_pop(parser->nodes); - if (node) { - node->is_ptr = true; - array_push(parser->nodes, node, parser->storage); - } - return; - } - Node *node = node_alloc(parser, NODE_SYMBOL, prev); - if (!node) return; - if (parse_match(parser, TOK_DOT)) { - // Symbol chain. - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol after '.' operator")); - parse_symbol(parser); - node->next = array_pop(parser->nodes); - } else if (parse_match(parser, TOK_LCURLY)) { - // Struct literal. - node->kind = NODE_STRUCT_LIT; - while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { - Node *field = node_alloc(parser, NODE_VAL_FIELD, parser->current); - parse_consume(parser, TOK_SYMBOL, cstr("expected symbol name")); - parse_symbol(parser); - field->field_name = array_pop(parser->nodes); - parse_consume(parser, TOK_ASSIGN, cstr("expected assignment")); - parse_expr(parser, PREC_LOW); - field->field_type = array_pop(parser->nodes); - array_push(node->elements, field, parser->storage); - } - } else if (parse_match(parser, TOK_LSQUARE)) { - node->kind = NODE_SYMBOL_IDX; - parse_consume(parser, TOK_NUM_INT, cstr("no array size given")); - parse_number(parser); - node->arr_size = array_pop(parser->nodes); - parse_consume(parser, TOK_RSQUARE, - cstr("unmatched brackets ']' in array type")); - if (parse_match(parser, TOK_DOT)) { - // Symbol chain. - parse_consume(parser, TOK_SYMBOL, - cstr("expected symbol after '.' operator")); - parse_symbol(parser); - node->next = array_pop(parser->nodes); - } - } else { - node = node_alloc(parser, NODE_SYMBOL, prev); - if (!node) return; - } - node->value.sym = prev.val; - array_push(parser->nodes, node, parser->storage); -} - -void -parse_grouping(Parser *parser) { - if (parser->panic) return; -#if DEBUG == 1 - print("parsing group "); - print_token(parser->previous); -#endif - parse_expr(parser, PREC_LOW); - parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression")); -} - -void -graph_node(Node *node) { - if (node == NULL) return; - print("%d [width=2.5,shape=Mrecord,label=\"", node->id); - print("{ %s | { L: %d | C: %d } } ", node_str[node->kind], - node->line, node->col); - switch (node->kind) { - case NODE_NUM_INT: print("| Value: %d", node->value.i); break; - case NODE_NUM_UINT: print("| Value: %x", node->value.u); break; - case NODE_NUM_FLOAT: print("| Value: %f{2}", node->value.d); break; - case NODE_STRING: print("| Value: %s", node->value.str); break; - case NODE_SYMBOL_IDX: - case NODE_STRUCT: - case NODE_STRUCT_LIT: print("| Name: %s", node->value.sym); break; - case NODE_SYMBOL: - case NODE_TYPE: { - if (node->is_ptr) { - print("| Name: @%s", node->value.sym); - } else { - print("| Name: %s", node->value.sym); - } - } break; - default: break; - } - println("\"];"); - - switch (node->kind) { - case NODE_COND: - case NODE_MATCH: { - if (node->match_expr) { - println("%d:e->%d:w;", node->id, node->match_expr->id); - graph_node(node->match_expr); - } - for (sz i = 0; i < array_size(node->match_cases); i++) { - Node *next = node->match_cases[i]; - println("%d:e->%d:w;", node->id, next->id); - graph_node(next); - } - } break; - case NODE_BLOCK: - case NODE_STRUCT_LIT: - case NODE_COMPOUND_TYPE: { - for (sz i = 0; i < array_size(node->elements); i++) { - Node *next = node->elements[i]; - println("%d:e->%d:w;", node->id, next->id); - graph_node(next); - } - } break; - case NODE_ENUM: - case NODE_STRUCT: { - for (sz i = 0; i < array_size(node->struct_field); i++) { - Node *next = node->struct_field[i]; - println("%d:e->%d:w;", node->id, next->id); - graph_node(next); - } - } break; - case NODE_IF: { - if (node->cond_if) { - println("%d:e->%d:w;", node->id, node->cond_if->id); - graph_node(node->cond_if); - } - if (node->cond_expr) { - println("%d:e->%d:w;", node->id, node->cond_expr->id); - graph_node(node->cond_expr); - } - if (node->cond_else) { - println("%d:e->%d:w;", node->id, node->cond_else->id); - graph_node(node->cond_else); - } - } break; - case NODE_VAL_FIELD: - case NODE_SET: - case NODE_LET: { - if (node->var_name) { - println("%d:e->%d:w;", node->id, node->var_name->id); - graph_node(node->var_name); - } - if (node->var_type) { - println("%d:e->%d:w;", node->id, node->var_type->id); - graph_node(node->var_type); - } - if (node->var_val) { - println("%d:e->%d:w;", node->id, node->var_val->id); - graph_node(node->var_val); - } - } break; - default: { - if (node->left) { - println("%d:e->%d:w;", node->id, node->left->id); - graph_node(node->left); - } - if (node->right) { - println("%d:e->%d:w;", node->id, node->right->id); - graph_node(node->right); - } - } break; - } -} - -void -graph_ast(Node **roots) { - if (roots == NULL) return; - println("digraph ast {"); - println("rankdir=LR;"); - println("ranksep=\"0.95 equally\";"); - println("nodesep=\"0.5 equally\";"); - println("overlap=scale;"); - println("bgcolor=\"transparent\";"); - for (sz i = 0; i < array_size(roots); ++i) { - println("subgraph %d {", i); - Node *root = roots[array_size(roots) - 1 - i]; - graph_node(root); - println("}"); - } - println("}"); -} - void process_file(Str path) { Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); -- cgit v1.2.1