From 27e6bf016e87702d30ebb7c0716ff3e4f08a8823 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Wed, 19 Jun 2024 08:51:35 +0200 Subject: Move parser to its own file --- src/parser.c | 1340 +++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 911 insertions(+), 429 deletions(-) (limited to 'src/parser.c') diff --git a/src/parser.c b/src/parser.c index 3f15b47..84f3424 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,488 +1,970 @@ -#include "parser.h" -#include "darray.h" +#ifndef PARSER_C +#define PARSER_C + +#include "badlib.h" +#include "lexer.c" + +// +// Nodes +// + +typedef enum NodeKind { + // Arithmetic. + NODE_ADD, + NODE_SUB, + NODE_DIV, + NODE_MUL, + NODE_MOD, + // Logical. + NODE_NOT, + NODE_AND, + NODE_OR, + NODE_EQ, + NODE_NEQ, + NODE_LT, + NODE_GT, + NODE_LE, + NODE_GE, + // Bitwise ops. + NODE_BITNOT, + NODE_BITAND, + NODE_BITOR, + NODE_BITLSHIFT, + NODE_BITRSHIFT, + // Literals. + NODE_STRING, + NODE_SYMBOL, + NODE_NUM_INT, + NODE_NUM_UINT, + NODE_NUM_FLOAT, + NODE_TRUE, + NODE_FALSE, + NODE_NIL, + NODE_STRUCT_LIT, + // Keywords. + NODE_LET, + NODE_SET, + NODE_STRUCT, + NODE_IF, + NODE_MATCH, + NODE_CASE, + NODE_COND, + NODE_ENUM, + NODE_BREAK, + NODE_CONTINUE, + NODE_WHILE, + // Helpers. + NODE_SYMBOL_IDX, + NODE_TYPE, + NODE_ARR_TYPE, + NODE_COMPOUND_TYPE, + NODE_VAL_FIELD, + NODE_BLOCK, +} NodeKind; + +Str node_str[] = { + // Arithmetic. + [NODE_ADD] = cstr("ADD"), + [NODE_SUB] = cstr("SUB"), + [NODE_DIV] = cstr("DIV"), + [NODE_MUL] = cstr("MUL"), + [NODE_MOD] = cstr("MOD"), + // Logical. + [NODE_NOT] = cstr("NOT"), + [NODE_AND] = cstr("AND"), + [NODE_OR] = cstr("OR"), + [NODE_EQ] = cstr("EQ"), + [NODE_NEQ] = cstr("NEQ"), + [NODE_LT] = cstr("LT"), + [NODE_GT] = cstr("GT"), + [NODE_LE] = cstr("LE"), + [NODE_GE] = cstr("GE"), + // Bitwise ops. + [NODE_BITNOT] = cstr("BITNOT"), + [NODE_BITAND] = cstr("BITAND"), + [NODE_BITOR] = cstr("BITOR"), + [NODE_BITLSHIFT] = cstr("BITLSHIFT"), + [NODE_BITRSHIFT] = cstr("BITRSHIFT"), + // Literals. + [NODE_STRING] = cstr("STRING"), + [NODE_SYMBOL] = cstr("SYMBOL"), + [NODE_NUM_INT] = cstr("INT"), + [NODE_NUM_UINT] = cstr("UINT"), + [NODE_NUM_FLOAT] = cstr("FLOAT"), + [NODE_TRUE] = cstr("TRUE"), + [NODE_FALSE] = cstr("FALSE"), + [NODE_NIL] = cstr("NIL"), + [NODE_STRUCT_LIT] = cstr("STRUCT LIT"), + // Keywords. + [NODE_LET] = cstr("LET"), + [NODE_SET] = cstr("SET"), + [NODE_STRUCT] = cstr("STRUCT DEF"), + [NODE_IF] = cstr("IF"), + [NODE_MATCH] = cstr("MATCH"), + [NODE_CASE] = cstr("CASE"), + [NODE_COND] = cstr("COND"), + [NODE_ENUM] = cstr("ENUM"), + [NODE_BREAK] = cstr("BREAK"), + [NODE_CONTINUE] = cstr("CONTINUE"), + [NODE_WHILE] = cstr("WHILE"), + // Helpers. + [NODE_TYPE] = cstr("TYPE"), + [NODE_ARR_TYPE] = cstr("TYPE (ARR)"), + [NODE_SYMBOL_IDX] = cstr("SYMBOL[IDX]"), + [NODE_COMPOUND_TYPE] = cstr("TYPE (COMPOUND)"), + [NODE_VAL_FIELD] = cstr("FIELD"), + [NODE_BLOCK] = cstr("BLOCK"), +}; + +typedef struct Node { + sz id; + sz line; + sz col; + + NodeKind kind; + union { + f64 d; + sz i; + u64 u; + Str str; + Str sym; + } value; + union { + struct { + struct Node *left; + struct Node *right; + }; + struct { + struct Node *next; + struct Node *arr_size; + }; + struct { + struct Node *var_name; + struct Node *var_type; + struct Node *var_val; + }; + struct { + struct Node *while_cond; + struct Node *while_expr; + }; + struct { + struct Node *cond_if; + struct Node *cond_expr; + struct Node *cond_else; + }; + struct { + struct Node *field_name; + struct Node *field_type; + struct Node *field_val; + }; + struct { + struct Node *match_expr; + struct Node **match_cases; + }; + struct { + struct Node *case_value; + struct Node *case_expr; + }; + struct Node **struct_field; + struct Node **elements; + struct Node **statements; + }; + bool is_ptr; +} Node; + +// +// Parsing. +// + +typedef struct Parser { + Token current; + Token previous; + Token *tokens; + sz idx; + + // Error handling. + bool err; + bool panic; + Str file_name; + + // Storage. + Node **nodes; + Arena *storage; +} Parser; + +typedef enum { + PREC_NONE = 0, + PREC_LOW, // lowest precedence + PREC_BITLOGIC, // & | + PREC_BITSHIFT, // << >> + PREC_OR, // || + PREC_AND, // && + PREC_EQUALITY, // == != + PREC_COMPARISON, // < > <= >= + PREC_TERM, // + - + PREC_FACTOR, // * / % + PREC_UNARY, // ! - + PREC_CALL, // . () + PREC_PRIMARY // highest precedence +} ParsePrecedence; + +typedef void (*ParseFn)(Parser *); + +typedef struct { + ParseFn prefix; + ParseFn infix; + ParsePrecedence precedence; +} ParseRule; + +void parse_expr(Parser *parser, ParsePrecedence precedence); +void parse_advance(Parser *parser); +bool parse_match(Parser *parser, TokenKind kind); + +void parse_grouping(Parser *parser); +void parse_unary(Parser *parser); +void parse_binary(Parser *parser); +void parse_number(Parser *parser); +void parse_literal(Parser *parser); +void parse_string(Parser *parser); +void parse_symbol(Parser *parser); +void parse_keyword(Parser *parser); +void parse_type(Parser *parser); +void parse_block(Parser *parser); + +ParseRule parse_rules[] = { + [TOK_LPAREN] = {parse_grouping, NULL, PREC_NONE}, + [TOK_LCURLY] = {parse_block, NULL, PREC_NONE}, + [TOK_AT] = {parse_symbol, NULL, PREC_NONE}, + + // Arithmetic. + [TOK_SUB] = {parse_unary, parse_binary, PREC_TERM}, + [TOK_ADD] = {NULL, parse_binary, PREC_TERM}, + [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, + [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, + [TOK_MOD] = {NULL, parse_binary, PREC_FACTOR}, + + // Logical. + [TOK_NOT] = {parse_unary, NULL, PREC_NONE}, + [TOK_AND] = {NULL, parse_binary, PREC_AND}, + [TOK_OR] = {NULL, parse_binary, PREC_OR}, + [TOK_EQ] = {NULL, parse_binary, PREC_EQUALITY}, + [TOK_NEQ] = {NULL, parse_binary, PREC_EQUALITY}, + [TOK_LT] = {NULL, parse_binary, PREC_COMPARISON}, + [TOK_GT] = {NULL, parse_binary, PREC_COMPARISON}, + [TOK_LE] = {NULL, parse_binary, PREC_COMPARISON}, + [TOK_GE] = {NULL, parse_binary, PREC_COMPARISON}, + + // Bitwise ops. + [TOK_BITNOT] = {parse_unary, NULL, PREC_NONE}, + [TOK_BITAND] = {NULL, parse_binary, PREC_BITLOGIC}, + [TOK_BITOR] = {NULL, parse_binary, PREC_BITLOGIC}, + [TOK_BITLSHIFT] = {NULL, parse_binary, PREC_BITSHIFT}, + [TOK_BITRSHIFT] = {NULL, parse_binary, PREC_BITSHIFT}, + + // Literals. + [TOK_STRING] = {parse_string, NULL, PREC_NONE}, + [TOK_SYMBOL] = {parse_symbol, NULL, PREC_NONE}, + [TOK_CHAR] = {parse_number, NULL, PREC_NONE}, + [TOK_NUM_INT] = {parse_number, NULL, PREC_NONE}, + [TOK_NUM_FLOAT] = {parse_number, NULL, PREC_NONE}, + [TOK_TRUE] = {parse_literal, NULL, PREC_NONE}, + [TOK_FALSE] = {parse_literal, NULL, PREC_NONE}, + [TOK_NIL] = {parse_literal, NULL, PREC_NONE}, + + // Statements. + [TOK_LET] = {parse_keyword, NULL, PREC_NONE}, + [TOK_SET] = {parse_keyword, NULL, PREC_NONE}, + [TOK_STRUCT] = {parse_keyword, NULL, PREC_NONE}, + [TOK_ENUM] = {parse_keyword, NULL, PREC_NONE}, + [TOK_IF] = {parse_keyword, NULL, PREC_NONE}, + [TOK_MATCH] = {parse_keyword, NULL, PREC_NONE}, + [TOK_COND] = {parse_keyword, NULL, PREC_NONE}, + [TOK_WHILE] = {parse_keyword, NULL, PREC_NONE}, + [TOK_CONTINUE] = {parse_keyword, NULL, PREC_NONE}, + [TOK_BREAK] = {parse_keyword, NULL, PREC_NONE}, + [TOK_FUN] = {parse_keyword, NULL, PREC_NONE}, + [TOK_RETURN] = {parse_keyword, NULL, PREC_NONE}, + + [TOK_EOF] = {NULL, NULL, PREC_NONE}, +}; -Token -next_token(Parser *parser) { - return parser->tokens[parser->current_token++]; +Node * +node_alloc(Parser *parser, NodeKind kind, Token tok) { + if (parser->panic) return NULL; + static sz id = 0; + Node *node = arena_calloc((sz)sizeof(Node), parser->storage); + node->id = id++; + node->kind = kind; + node->line = tok.line; + node->col = tok.col; + return node; } -Token -peek_token(Parser *parser) { - return parser->tokens[parser->current_token]; -} +void +parse_emit_err(Parser *parser, Token token, Str msg) { + if (parser->panic) return; + parser->panic = true; + parser->err = true; + eprint("%s:%d:%d: error: %s", parser->file_name, token.line, token.col, + msg); -bool -has_next(Parser *parser) { - return parser->current_token < array_size(parser->tokens); + if (token.kind == TOK_EOF) { + eprintln(" at end of the file"); + } else if (token.kind != TOK_UNKNOWN) { + eprintln(" at %s", token.val); + } } -bool -consume_rparen(Parser *parser) { - Token tok = next_token(parser); - if (tok.type != TOKEN_RPAREN) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_RPAREN, tok.line, tok.col); - return false; +void +parse_advance(Parser *parser) { + assert(parser); + parser->previous = parser->current; + if (parser->previous.kind == TOK_EOF) { + return; } - return true; + parser->current = parser->tokens[parser->idx++]; } bool -consume_lparen(Parser *parser) { - Token tok = next_token(parser); - if (tok.type != TOKEN_LPAREN) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_LPAREN, tok.line, tok.col); +parse_match(Parser *parser, TokenKind kind) { + assert(parser); + if (parser->current.kind != kind) { return false; } + parse_advance(parser); return true; } -Node * -parse_number(Parser *parser) { - Token tok = next_token(parser); - if (tok.type != TOKEN_NUMBER) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_NUMBER, tok.line, tok.col); - return NULL; - } - - bool negative = false; - int base = 10; - char c = sv_next(&tok.value); - if (c == '-') { - negative = true; - c = sv_next(&tok.value); - } - if (c == '+') { - c = sv_next(&tok.value); - } - if (c == '0' && sv_peek(&tok.value) != '\0') { - c = sv_next(&tok.value); - if (c == 'x') { - base = 16; - c = sv_next(&tok.value); - } else if (c == 'b') { - base = 2; - c = sv_next(&tok.value); - } else if (!(c >= '0' && c <= '9')){ - push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); - return NULL; - } - } - - // Integral part. - u64 integral = 0; - while (c != '\0') { - ssize_t current = 0; - if (c >= 'a' && c <= 'z' && base == 16) { - current = (c - 'a') + 10; - } else if (c >= 'A' && c <= 'Z' && base == 16) { - current = (c - 'A') + 10; - } else if (c >= '0' && c <= '9') { - current = (c - '0'); - } else if (c == '.') { - c = sv_next(&tok.value); - break; - } else { - push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); - return NULL; - } - integral = integral * base + current; - c = sv_next(&tok.value); - } - - // Fractional part. - u64 fractional = 0; - while (c != '\0') { - ssize_t current = 0; - if (c >= 'a' && c <= 'z' && base == 16) { - current = (c - 'a') + 10; - } else if (c >= 'A' && c <= 'Z' && base == 16) { - current = (c - 'A') + 10; - } else if (c >= '0' && c <= '9') { - current = (c - '0'); - } else { - push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); - return NULL; - } - fractional = fractional * base + current; - c = sv_next(&tok.value); +static void +parse_consume(Parser *parser, TokenKind kind, Str msg) { + if (parser->current.kind == kind) { + parse_advance(parser); + return; } - - Node * node = alloc_node(NODE_NUMBER); - node->number.negative = negative; - node->number.integral = integral; - node->number.fractional = fractional; - node->line = tok.line; - node->col = tok.col; - return node; + parse_emit_err(parser, parser->current, msg); } -Node * -parse_string(Parser *parser) { - Token tok = next_token(parser); - if (tok.type != TOKEN_STRING) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_STRING, tok.line, tok.col); - return NULL; - } - Node *node = alloc_node(NODE_STRING); - node->string = tok.value; - node->line = tok.line; - node->col = tok.col; - return node; +void +parse_block(Parser *parser) { +#if DEBUG == 1 + print("parsing block "); + print_token(parser->previous); +#endif + Node *block = node_alloc(parser, NODE_BLOCK, parser->previous); + if (!block) return; + while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { + parse_expr(parser, PREC_LOW); + Node *next = array_pop(parser->nodes); + array_push(block->statements, next, parser->storage); + } + array_push(parser->nodes, block, parser->storage); } -Node * -parse_symbol(Parser *parser) { - Token tok = next_token(parser); - if (tok.type != TOKEN_SYMBOL) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_SYMBOL, tok.line, tok.col); - return NULL; +void +parse_expr(Parser *parser, ParsePrecedence precedence) { + parse_advance(parser); + // TODO: synchronize panic mode on keywords. + if (parser->panic) return; + ParseFn prefix = parse_rules[parser->previous.kind].prefix; + if (prefix == NULL) { + parse_emit_err(parser, parser->previous, cstr("expected expression")); + return; + } + prefix(parser); + + while (precedence <= parse_rules[parser->current.kind].precedence) { + parse_advance(parser); + ParseFn infix = parse_rules[parser->previous.kind].infix; + if (infix == NULL) { + parse_emit_err(parser, parser->previous, + cstr("expected expression")); + return; + } + infix(parser); } - Node *node = alloc_node(NODE_SYMBOL); - node->string = tok.value; - node->line = tok.line; - node->col = tok.col; - return node; } -Node * -parse_type(Parser *parser) { - Token tok = next_token(parser); - if (tok.type != TOKEN_COLON) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); - return NULL; - } - Node *node = alloc_node(NODE_TYPE); - node->line = tok.line; - node->col = tok.col; - tok = next_token(parser); - if (tok.type != TOKEN_SYMBOL) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); - return NULL; - } - node->string = tok.value; - return node; +void +parse_unary(Parser *parser) { + if (parser->panic) return; + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing unary "); + print_token(prev); +#endif + parse_expr(parser, PREC_UNARY); + Node *node = NULL; + switch (prev.kind) { + case TOK_NOT: node = node_alloc(parser, NODE_NOT, prev); break; + case TOK_BITNOT: node = node_alloc(parser, NODE_BITNOT, prev); break; + default: break; // Unreachable. + } + if (!node) return; + node->left = array_pop(parser->nodes); + array_push(parser->nodes, node, parser->storage); } -Node * -parse_bool(Parser *parser) { - Token tok = next_token(parser); - if (!(tok.type == TOKEN_TRUE || tok.type == TOKEN_FALSE)) { - push_error(ERR_TYPE_PARSER, ERR_NOT_A_BOOL, tok.line, tok.col); - return NULL; - } - Node *node = alloc_node(NODE_BOOL); - node->boolean = tok.type == TOKEN_TRUE; - node->line = tok.line; - node->col = tok.col; - return node; +void +parse_literal(Parser *parser) { + if (parser->panic) return; + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing literal "); + print_token(prev); +#endif + Node *node = NULL; + switch (prev.kind) { + case TOK_TRUE: node = node_alloc(parser, NODE_TRUE, prev); break; + case TOK_FALSE: node = node_alloc(parser, NODE_FALSE, prev); break; + case TOK_NIL: node = node_alloc(parser, NODE_NIL, prev); break; + default: return; // Unreachable. + } + if (!node) return; + array_push(parser->nodes, node, parser->storage); } -Node * -parse_builtin(Parser *parser) { - Token tok = next_token(parser); - Node *node = alloc_node(NODE_BUILTIN); - node->builtin.type = tok.type; - node->line = tok.line; - node->col = tok.col; - array_init(node->builtin.args, 0); - while (has_next(parser)) { - Token next = peek_token(parser); - if (next.type == TOKEN_RPAREN) { - next_token(parser); - return node; +void +parse_type(Parser *parser) { + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing type "); + print_token(prev); +#endif + Node *node = node_alloc(parser, NODE_TYPE, prev); + if (!node) return; + if (parse_match(parser, TOK_AT)) { + node->is_ptr = true; + } + if (parse_match(parser, TOK_LCURLY)) { + node->kind = NODE_COMPOUND_TYPE; + while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { + parse_consume(parser, TOK_SYMBOL, cstr("expected name")); + parse_symbol(parser); + Node *next = array_pop(parser->nodes); + parse_consume(parser, TOK_COLON, cstr("incomplete type")); + parse_type(parser); + next->next = array_pop(parser->nodes); + array_push(node->elements, next, parser->storage); } - Node *arg = parse_next(parser); - if (arg == NULL) { - break; + } else { + parse_consume(parser, TOK_SYMBOL, + cstr("no type given for struct field")); + node->value.sym = parser->previous.val; + // Optional array value? + if (parse_match(parser, TOK_LSQUARE)) { + node->kind = NODE_ARR_TYPE, + parse_consume(parser, TOK_NUM_INT, cstr("no array size given")); + parse_number(parser); + node->arr_size = array_pop(parser->nodes); + parse_consume(parser, TOK_RSQUARE, + cstr("unmatched brackets ']' in array type")); } - array_push(node->builtin.args, arg); } - push_error(ERR_TYPE_PARSER, ERR_UNMATCHED_PAREN, tok.line, tok.col); - return NULL; + array_push(parser->nodes, node, parser->storage); } -Node * -parse_funcall(Parser *parser) { - Token tok = peek_token(parser); - Node *name = parse_symbol(parser); - if (name == NULL) { - return NULL; - } - Node *node = alloc_node(NODE_FUNCALL); - node->funcall.name = name; - node->line = tok.line; - node->col = tok.col; - array_init(node->funcall.args, 0); - while (has_next(parser)) { - Token next = peek_token(parser); - if (next.type == TOKEN_RPAREN) { - next_token(parser); - return node; - } - Node *arg = parse_next(parser); - if (arg == NULL) { - break; - } - array_push(node->funcall.args, arg); +void +parse_keyword(Parser *parser) { + if (parser->panic) return; + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing keyword "); + print_token(prev); +#endif + Node *node = NULL; + switch (prev.kind) { + case TOK_LET: { + node = node_alloc(parser, NODE_LET, prev); + if (!node) return; + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol name on let expression")); + parse_symbol(parser); + node->var_name = array_pop(parser->nodes); + if (node->var_name->next) { + parse_emit_err(parser, prev, + cstr("invalid symbol name in let expression")); + return; + } + + // Optional type declaration. + if (parse_match(parser, TOK_COLON)) { + parse_type(parser); + node->var_type = array_pop(parser->nodes); + } + + // Optional assignment. + if (parse_match(parser, TOK_ASSIGN)) { + parse_expr(parser, PREC_LOW); + node->var_val = array_pop(parser->nodes); + } + } break; + case TOK_SET: { + node = node_alloc(parser, NODE_SET, prev); + if (!node) return; + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol name on let expression")); + parse_symbol(parser); + node->var_name = array_pop(parser->nodes); + parse_consume(parser, TOK_ASSIGN, + cstr("expected assignment on set expression")); + parse_expr(parser, PREC_LOW); + node->var_val = array_pop(parser->nodes); + } break; + case TOK_STRUCT: { + node = node_alloc(parser, NODE_STRUCT, prev); + if (!node) return; + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol name on struct definition")); + // Just consume this to avoid conflicts with struct literals. + node->value.sym = parser->previous.val; + + parse_consume(parser, TOK_LCURLY, + cstr("expected '{' on struct definition")); + + while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { + Node *field = + node_alloc(parser, NODE_VAL_FIELD, parser->current); + if (!field) return; + parse_consume( + parser, TOK_SYMBOL, + cstr("expected symbol name on struct definition")); + parse_symbol(parser); + field->field_name = array_pop(parser->nodes); + if (field->field_name->next || + field->field_name->kind == NODE_SYMBOL_IDX) { + parse_emit_err(parser, prev, + cstr("invalid symbol name in struct field")); + return; + } + parse_consume( + parser, TOK_COLON, + cstr("expected symbol name on struct definition")); + parse_type(parser); + field->field_type = array_pop(parser->nodes); + + // Optional assignment. + if (parse_match(parser, TOK_ASSIGN)) { + parse_expr(parser, PREC_LOW); + field->field_val = array_pop(parser->nodes); + } + + array_push(node->struct_field, field, parser->storage); + } + } break; + case TOK_IF: { + node = node_alloc(parser, NODE_IF, prev); + if (!node) return; + parse_consume(parser, TOK_LPAREN, + cstr("expected '(' on if expression")); + parse_expr(parser, PREC_LOW); + node->cond_if = array_pop(parser->nodes); + parse_consume(parser, TOK_RPAREN, + cstr("expected ')' on if expression")); + parse_expr(parser, PREC_LOW); + node->cond_expr = array_pop(parser->nodes); + if (parse_match(parser, TOK_ELSE)) { + parse_expr(parser, PREC_LOW); + node->cond_else = array_pop(parser->nodes); + } + } break; + case TOK_MATCH: { + node = node_alloc(parser, NODE_MATCH, prev); + if (!node) return; + parse_consume(parser, TOK_LPAREN, + cstr("expected '(' on if expression")); + parse_expr(parser, PREC_LOW); + node->match_expr = array_pop(parser->nodes); + parse_consume(parser, TOK_RPAREN, + cstr("expected ')' on if expression")); + parse_consume(parser, TOK_LCURLY, + cstr("expected block of match cases")); + while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { + Node *tmp = node_alloc(parser, NODE_CASE, parser->previous); + if (!tmp) return; + // Are we on the default case. + if (!parse_match(parser, TOK_ELSE)) { + parse_consume(parser, TOK_CASE, + cstr("expected case statement")); + parse_expr(parser, PREC_LOW); + tmp->case_value = array_pop(parser->nodes); + } + parse_consume(parser, TOK_ASSIGN, + cstr("malformed case statement")); + parse_expr(parser, PREC_LOW); + tmp->case_expr = array_pop(parser->nodes); + array_push(node->match_cases, tmp, parser->storage); + } + // TODO: Check that we only have literals on the match case, this + // could be done on the analysis step, but also here... + // TODO: Check that there are no multiple default or duplicated + // cases. + } break; + case TOK_ENUM: { + node = node_alloc(parser, NODE_ENUM, prev); + if (!node) return; + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol name on enum definition")); + // Just consume this to avoid conflicts with struct literals. + node->value.sym = parser->previous.val; + parse_consume(parser, TOK_LCURLY, + cstr("expected '{' on enum definition")); + while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { + Node *field = + node_alloc(parser, NODE_VAL_FIELD, parser->current); + if (!field) return; + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol name on enum definition")); + parse_symbol(parser); + field->field_name = array_pop(parser->nodes); + if (field->field_name->next || + field->field_name->kind == NODE_SYMBOL_IDX) { + parse_emit_err(parser, prev, + cstr("invalid symbol name in enum field")); + return; + } + if (parse_match(parser, TOK_ASSIGN)) { + parse_expr(parser, PREC_LOW); + field->field_val = array_pop(parser->nodes); + } + array_push(node->struct_field, field, parser->storage); + } + } break; + case TOK_COND: { + node = node_alloc(parser, NODE_COND, prev); + if (!node) return; + parse_consume(parser, TOK_LCURLY, + cstr("expected '{' on cond expression")); + while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { + Node *tmp = node_alloc(parser, NODE_CASE, parser->previous); + if (!tmp) return; + // Are we on the default case. + if (!parse_match(parser, TOK_ELSE)) { + parse_consume(parser, TOK_CASE, + cstr("expected case statement")); + parse_expr(parser, PREC_LOW); + tmp->case_value = array_pop(parser->nodes); + } + parse_consume(parser, TOK_ASSIGN, + cstr("malformed case statement")); + parse_expr(parser, PREC_LOW); + tmp->case_expr = array_pop(parser->nodes); + array_push(node->match_cases, tmp, parser->storage); + } + } break; + case TOK_BREAK: { + node = node_alloc(parser, NODE_BREAK, prev); + if (!node) return; + } break; + case TOK_CONTINUE: { + node = node_alloc(parser, NODE_CONTINUE, prev); + if (!node) return; + } break; + case TOK_WHILE: { + node = node_alloc(parser, NODE_WHILE, prev); + if (!node) return; + parse_consume(parser, TOK_LPAREN, + cstr("expected '(' on while expression")); + parse_expr(parser, PREC_LOW); + node->while_cond = array_pop(parser->nodes); + parse_consume(parser, TOK_RPAREN, + cstr("expected ')' on while expression")); + parse_expr(parser, PREC_LOW); + node->while_expr = array_pop(parser->nodes); + } break; + default: return; // Unreachable. } - push_error(ERR_TYPE_PARSER, ERR_UNMATCHED_PAREN, tok.line, tok.col); - return NULL; + array_push(parser->nodes, node, parser->storage); } -Node * -parse_def(Parser *parser) { - Token tok = next_token(parser); - - Node *symbol = parse_symbol(parser); - if (symbol == NULL) { - return NULL; - } - - // TODO: Making type definitions mandatory for now until we introduce type - // inference. - Node *type = parse_type(parser); - if (type == NULL) { - return NULL; - } - - Node *value = parse_next(parser); - if (value == NULL) { - return NULL; - } - - if (!consume_rparen(parser)) { - return NULL; +void +parse_binary(Parser *parser) { + if (parser->panic) return; + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing binary "); + print_token(prev); +#endif + ParseRule rule = parse_rules[prev.kind]; + parse_expr(parser, rule.precedence + 1); + + Node *node; + switch (prev.kind) { + case TOK_ADD: node = node_alloc(parser, NODE_ADD, prev); break; + case TOK_SUB: node = node_alloc(parser, NODE_SUB, prev); break; + case TOK_MUL: node = node_alloc(parser, NODE_MUL, prev); break; + case TOK_DIV: node = node_alloc(parser, NODE_DIV, prev); break; + case TOK_MOD: node = node_alloc(parser, NODE_MOD, prev); break; + case TOK_AND: node = node_alloc(parser, NODE_AND, prev); break; + case TOK_OR: node = node_alloc(parser, NODE_OR, prev); break; + case TOK_EQ: node = node_alloc(parser, NODE_EQ, prev); break; + case TOK_NEQ: node = node_alloc(parser, NODE_NEQ, prev); break; + case TOK_LT: node = node_alloc(parser, NODE_LT, prev); break; + case TOK_GT: node = node_alloc(parser, NODE_GT, prev); break; + case TOK_LE: node = node_alloc(parser, NODE_LE, prev); break; + case TOK_GE: node = node_alloc(parser, NODE_GE, prev); break; + case TOK_BITAND: { + node = node_alloc(parser, NODE_BITAND, prev); + } break; + case TOK_BITOR: { + node = node_alloc(parser, NODE_BITOR, prev); + } break; + case TOK_BITLSHIFT: { + node = node_alloc(parser, NODE_BITLSHIFT, prev); + } break; + case TOK_BITRSHIFT: { + node = node_alloc(parser, NODE_BITRSHIFT, prev); + } break; + default: { + parse_emit_err(parser, prev, cstr("unreachable")); + return; + } } - - Node *node = alloc_node(NODE_DEF); - node->def.symbol = symbol; - node->def.value = value; - node->def.type = type; - node->line = tok.line; - node->col = tok.col; - return node; + if (!node) return; + node->right = array_pop(parser->nodes); + node->left = array_pop(parser->nodes); + array_push(parser->nodes, node, parser->storage); } -Node * -parse_set(Parser *parser) { - Token tok = next_token(parser); - - Node *symbol = parse_symbol(parser); - if (symbol == NULL) { - return NULL; - } - - Node *value = parse_next(parser); - if (value == NULL) { - return NULL; - } - - if (!consume_rparen(parser)) { - return NULL; +void +parse_number(Parser *parser) { + if (parser->panic) return; + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing number "); + print_token(prev); +#endif + Node *node = NULL; + switch (prev.kind) { + case TOK_NUM_INT: { + if (str_has_prefix(prev.val, cstr("0x")) || + str_has_prefix(prev.val, cstr("0b"))) { + node = node_alloc(parser, NODE_NUM_UINT, prev); + if (!node) return; + node->value.u = str_to_uint(prev.val); + } else { + node = node_alloc(parser, NODE_NUM_INT, prev); + if (!node) return; + node->value.i = str_to_int(prev.val); + } + } break; + case TOK_NUM_FLOAT: { + node = node_alloc(parser, NODE_NUM_FLOAT, prev); + if (!node) return; + node->value.d = str_to_float(prev.val); + } break; + case TOK_CHAR: { + node = node_alloc(parser, NODE_NUM_INT, prev); + if (!node) return; + node->value.i = prev.val.mem[1]; + } break; + default: break; } - - Node *node = alloc_node(NODE_SET); - node->set.symbol = symbol; - node->set.value = value; - node->line = tok.line; - node->col = tok.col; - return node; + array_push(parser->nodes, node, parser->storage); } -Node * -parse_fun(Parser *parser) { - Token tok = next_token(parser); - - Node *name = parse_symbol(parser); - if (name == NULL) { - return NULL; - } - - Node *node = alloc_node(NODE_FUN); - node->fun.name = name; - array_init(node->fun.param_names, 0); - array_init(node->fun.param_types, 0); - node->line = tok.line; - node->col = tok.col; +void +parse_string(Parser *parser) { + if (parser->panic) return; + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing string "); + print_token(prev); +#endif + Node *node = node_alloc(parser, NODE_STRING, prev); + if (!node) return; + node->value.str = str_remove_prefix(prev.val, cstr("\"")); + node->value.str = str_remove_suffix(node->value.str, cstr("\"")); + array_push(parser->nodes, node, parser->storage); +} - // Parse parameter list and return type. - if (!consume_lparen(parser)) { - return NULL; - } - while (true) { - Token next = peek_token(parser); - if (next.type == TOKEN_RPAREN) { - next_token(parser); - break; +void +parse_symbol(Parser *parser) { + if (parser->panic) return; + Token prev = parser->previous; +#if DEBUG == 1 + print("parsing symbol "); + print_token(prev); +#endif + if (prev.kind == TOK_AT) { + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol after '.' operator")); + parse_symbol(parser); + Node *node = array_pop(parser->nodes); + if (node) { + node->is_ptr = true; + array_push(parser->nodes, node, parser->storage); } - Node *name = parse_symbol(parser); - if (name == NULL) { - return NULL; + return; + } + Node *node = node_alloc(parser, NODE_SYMBOL, prev); + if (!node) return; + if (parse_match(parser, TOK_DOT)) { + // Symbol chain. + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol after '.' operator")); + parse_symbol(parser); + node->next = array_pop(parser->nodes); + } else if (parse_match(parser, TOK_LCURLY)) { + // Struct literal. + node->kind = NODE_STRUCT_LIT; + while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { + Node *field = node_alloc(parser, NODE_VAL_FIELD, parser->current); + parse_consume(parser, TOK_SYMBOL, cstr("expected symbol name")); + parse_symbol(parser); + field->field_name = array_pop(parser->nodes); + parse_consume(parser, TOK_ASSIGN, cstr("expected assignment")); + parse_expr(parser, PREC_LOW); + field->field_type = array_pop(parser->nodes); + array_push(node->elements, field, parser->storage); } - Node *type = parse_type(parser); - if (type == NULL) { - return NULL; + } else if (parse_match(parser, TOK_LSQUARE)) { + node->kind = NODE_SYMBOL_IDX; + parse_consume(parser, TOK_NUM_INT, cstr("no array size given")); + parse_number(parser); + node->arr_size = array_pop(parser->nodes); + parse_consume(parser, TOK_RSQUARE, + cstr("unmatched brackets ']' in array type")); + if (parse_match(parser, TOK_DOT)) { + // Symbol chain. + parse_consume(parser, TOK_SYMBOL, + cstr("expected symbol after '.' operator")); + parse_symbol(parser); + node->next = array_pop(parser->nodes); } - array_push(node->fun.param_names, name); - array_push(node->fun.param_types, type); - } - Node *ret_type = parse_type(parser); - if (ret_type == NULL) { - return NULL; - } - node->fun.return_type = ret_type; - - Node *body = parse_next(parser); - if (body == NULL) { - return NULL; - } - node->fun.body = body; - if (!consume_rparen(parser)) { - return NULL; + } else { + node = node_alloc(parser, NODE_SYMBOL, prev); + if (!node) return; } - - return node; + node->value.sym = prev.val; + array_push(parser->nodes, node, parser->storage); } -Node * -parse_if(Parser *parser) { - Token tok = next_token(parser); - - Node *node = alloc_node(NODE_IF); - node->ifexpr.cond = NULL; - node->ifexpr.expr_true = NULL; - node->ifexpr.expr_false = NULL; - node->line = tok.line; - node->col = tok.col; - - Node *cond = parse_next(parser); - if (cond == NULL) { - return NULL; - } - Node *expr_true = parse_next(parser); - if (expr_true == NULL) { - return NULL; - } - node->ifexpr.cond = cond; - node->ifexpr.expr_true = expr_true; - tok = peek_token(parser); - - // Optional else statement. - if (tok.type != TOKEN_RPAREN) { - Node *expr_false = parse_next(parser); - if (expr_false == NULL) { - return NULL; - } - node->ifexpr.expr_false = expr_false; - } - - if (!consume_rparen(parser)) { - return NULL; - } - - return node; +void +parse_grouping(Parser *parser) { + if (parser->panic) return; +#if DEBUG == 1 + print("parsing group "); + print_token(parser->previous); +#endif + parse_expr(parser, PREC_LOW); + parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression")); } -Node * -parse_paren(Parser *parser) { - next_token(parser); // Skip paren. - - Token tok = peek_token(parser); - - switch (tok.type) { - // Builtin functions. - case TOKEN_ADD: - case TOKEN_SUB: - case TOKEN_MUL: - case TOKEN_DIV: - case TOKEN_MOD: - case TOKEN_NOT: - case TOKEN_AND: - case TOKEN_OR: - case TOKEN_EQ: - case TOKEN_LT: - case TOKEN_GT: - case TOKEN_LE: - case TOKEN_GE: { return parse_builtin(parser); } break; - // Special functions. - case TOKEN_DEF: { return parse_def(parser); } break; - case TOKEN_SET: { return parse_set(parser); } break; - case TOKEN_FUN: { return parse_fun(parser); } break; - case TOKEN_IF: { return parse_if(parser); } break; +void +graph_node(Node *node) { + if (node == NULL) return; + print("%d [width=2.5,shape=Mrecord,label=\"", node->id); + print("{ %s | { L: %d | C: %d } } ", node_str[node->kind], + node->line, node->col); + switch (node->kind) { + case NODE_NUM_INT: print("| Value: %d", node->value.i); break; + case NODE_NUM_UINT: print("| Value: %x", node->value.u); break; + case NODE_NUM_FLOAT: print("| Value: %f{2}", node->value.d); break; + case NODE_STRING: print("| Value: %s", node->value.str); break; + case NODE_SYMBOL_IDX: + case NODE_STRUCT: + case NODE_STRUCT_LIT: print("| Name: %s", node->value.sym); break; + case NODE_SYMBOL: + case NODE_TYPE: { + if (node->is_ptr) { + print("| Name: @%s", node->value.sym); + } else { + print("| Name: %s", node->value.sym); + } + } break; default: break; } - - return parse_funcall(parser); -} - -Node * -parse_block(Parser *parser) { - Token tok = next_token(parser); - - Node *node = alloc_node(NODE_BLOCK); - array_init(node->block.expr, 0); - node->line = tok.line; - node->col = tok.col; - while (true) { - Token next = peek_token(parser); - if (next.type == TOKEN_RCURLY) { - next_token(parser); - break; - } - Node *expr = parse_next(parser); - if (expr == NULL) { - return NULL; - } - array_push(node->block.expr, expr); - } - return node; -} - -Node * -parse_next(Parser *parser) { - Token tok = peek_token(parser); - switch (tok.type) { - case TOKEN_NUMBER: { return parse_number(parser); } break; - case TOKEN_STRING: { return parse_string(parser); } break; - case TOKEN_SYMBOL: { return parse_symbol(parser); } break; - case TOKEN_TRUE: - case TOKEN_FALSE: { return parse_bool(parser); } break; - case TOKEN_LPAREN: { return parse_paren(parser); } break; - case TOKEN_EOF: { return NULL; } break; - case TOKEN_LCURLY: { return parse_block(parser); } break; + println("\"];"); + + switch (node->kind) { + case NODE_COND: + case NODE_MATCH: { + if (node->match_expr) { + println("%d:e->%d:w;", node->id, node->match_expr->id); + graph_node(node->match_expr); + } + for (sz i = 0; i < array_size(node->match_cases); i++) { + Node *next = node->match_cases[i]; + println("%d:e->%d:w;", node->id, next->id); + graph_node(next); + } + } break; + case NODE_BLOCK: + case NODE_STRUCT_LIT: + case NODE_COMPOUND_TYPE: { + for (sz i = 0; i < array_size(node->elements); i++) { + Node *next = node->elements[i]; + println("%d:e->%d:w;", node->id, next->id); + graph_node(next); + } + } break; + case NODE_ENUM: + case NODE_STRUCT: { + for (sz i = 0; i < array_size(node->struct_field); i++) { + Node *next = node->struct_field[i]; + println("%d:e->%d:w;", node->id, next->id); + graph_node(next); + } + } break; + case NODE_IF: { + if (node->cond_if) { + println("%d:e->%d:w;", node->id, node->cond_if->id); + graph_node(node->cond_if); + } + if (node->cond_expr) { + println("%d:e->%d:w;", node->id, node->cond_expr->id); + graph_node(node->cond_expr); + } + if (node->cond_else) { + println("%d:e->%d:w;", node->id, node->cond_else->id); + graph_node(node->cond_else); + } + } break; + case NODE_VAL_FIELD: + case NODE_SET: + case NODE_LET: { + if (node->var_name) { + println("%d:e->%d:w;", node->id, node->var_name->id); + graph_node(node->var_name); + } + if (node->var_type) { + println("%d:e->%d:w;", node->id, node->var_type->id); + graph_node(node->var_type); + } + if (node->var_val) { + println("%d:e->%d:w;", node->id, node->var_val->id); + graph_node(node->var_val); + } + } break; default: { - push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_TOK_TYPE, tok.line, tok.col); - return NULL; + if (node->left) { + println("%d:e->%d:w;", node->id, node->left->id); + graph_node(node->left); + } + if (node->right) { + println("%d:e->%d:w;", node->id, node->right->id); + graph_node(node->right); + } } break; } } -bool -parse_roots(Parser *parser) { - while (has_next(parser)) { - Token tok = peek_token(parser); - if (tok.type == TOKEN_EOF) { - break; - } - Node *node = parse_next(parser); - if (node == NULL) { - return false; - } - array_push(parser->roots, node); - } - return true; +void +graph_ast(Node **roots) { + if (roots == NULL) return; + println("digraph ast {"); + println("rankdir=LR;"); + println("ranksep=\"0.95 equally\";"); + println("nodesep=\"0.5 equally\";"); + println("overlap=scale;"); + println("bgcolor=\"transparent\";"); + for (sz i = 0; i < array_size(roots); ++i) { + println("subgraph %d {", i); + Node *root = roots[array_size(roots) - 1 - i]; + graph_node(root); + println("}"); + } + println("}"); } -Root * -parse(Token *tokens) { - Parser parser = { - .tokens = tokens, - .current_token = 0, - }; - array_init(parser.roots, 0); - - if (!parse_roots(&parser)) { - return NULL; - } - return parser.roots; -} +#endif // PARSER_C -- cgit v1.2.1