#include "parser.h" #include "darray.h" Token next_token(Parser *parser) { return parser->tokens[parser->current_token++]; } Token peek_token(Parser *parser) { return parser->tokens[parser->current_token]; } bool has_next(Parser *parser) { return parser->current_token < array_size(parser->tokens); } bool consume_rparen(Parser *parser) { Token tok = next_token(parser); if (tok.type != TOKEN_RPAREN) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_RPAREN, tok.line, tok.col); return false; } return true; } bool consume_lparen(Parser *parser) { Token tok = next_token(parser); if (tok.type != TOKEN_LPAREN) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_LPAREN, tok.line, tok.col); return false; } return true; } Node * parse_number(Parser *parser) { Token tok = next_token(parser); if (tok.type != TOKEN_NUMBER) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_NUMBER, tok.line, tok.col); return NULL; } bool negative = false; int base = 10; char c = sv_next(&tok.value); if (c == '-') { negative = true; c = sv_next(&tok.value); } if (c == '+') { c = sv_next(&tok.value); } if (c == '0' && sv_peek(&tok.value) != '\0') { c = sv_next(&tok.value); if (c == 'x') { base = 16; c = sv_next(&tok.value); } else if (c == 'b') { base = 2; c = sv_next(&tok.value); } else if (!(c >= '0' && c <= '9')){ push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); return NULL; } } // Integral part. u64 integral = 0; while (c != '\0') { ssize_t current = 0; if (c >= 'a' && c <= 'z' && base == 16) { current = (c - 'a') + 10; } else if (c >= 'A' && c <= 'Z' && base == 16) { current = (c - 'A') + 10; } else if (c >= '0' && c <= '9') { current = (c - '0'); } else if (c == '.') { c = sv_next(&tok.value); break; } else { push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); return NULL; } integral = integral * base + current; c = sv_next(&tok.value); } // Fractional part. u64 fractional = 0; while (c != '\0') { ssize_t current = 0; if (c >= 'a' && c <= 'z' && base == 16) { current = (c - 'a') + 10; } else if (c >= 'A' && c <= 'Z' && base == 16) { current = (c - 'A') + 10; } else if (c >= '0' && c <= '9') { current = (c - '0'); } else { push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); return NULL; } fractional = fractional * base + current; c = sv_next(&tok.value); } Node * node = alloc_node(NODE_NUMBER); node->number.negative = negative; node->number.integral = integral; node->number.fractional = fractional; node->line = tok.line; node->col = tok.col; return node; } Node * parse_string(Parser *parser) { Token tok = next_token(parser); if (tok.type != TOKEN_STRING) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_STRING, tok.line, tok.col); return NULL; } Node *node = alloc_node(NODE_STRING); node->string = tok.value; node->line = tok.line; node->col = tok.col; return node; } Node * parse_symbol(Parser *parser) { Token tok = next_token(parser); if (tok.type != TOKEN_SYMBOL) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_SYMBOL, tok.line, tok.col); return NULL; } Node *node = alloc_node(NODE_SYMBOL); node->string = tok.value; node->line = tok.line; node->col = tok.col; return node; } Node * parse_type(Parser *parser) { Token tok = next_token(parser); if (tok.type != TOKEN_COLON) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); return NULL; } Node *node = alloc_node(NODE_TYPE); node->line = tok.line; node->col = tok.col; tok = next_token(parser); if (tok.type != TOKEN_SYMBOL) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); return NULL; } node->string = tok.value; return node; } Node * parse_bool(Parser *parser) { Token tok = next_token(parser); if (!(tok.type == TOKEN_TRUE || tok.type == TOKEN_FALSE)) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_BOOL, tok.line, tok.col); return NULL; } Node *node = alloc_node(NODE_BOOL); node->boolean = tok.type == TOKEN_TRUE; node->line = tok.line; node->col = tok.col; return node; } Node * parse_builtin(Parser *parser) { Token tok = next_token(parser); Node *node = alloc_node(NODE_BUILTIN); node->builtin.type = tok.type; node->line = tok.line; node->col = tok.col; array_init(node->builtin.args, 0); while (has_next(parser)) { Token next = peek_token(parser); if (next.type == TOKEN_RPAREN) { next_token(parser); return node; } Node *arg = parse_next(parser); if (arg == NULL) { break; } array_push(node->builtin.args, arg); } push_error(ERR_TYPE_PARSER, ERR_UNMATCHED_PAREN, tok.line, tok.col); return NULL; } Node * parse_funcall(Parser *parser) { Token tok = peek_token(parser); Node *name = parse_symbol(parser); if (name == NULL) { return NULL; } Node *node = alloc_node(NODE_FUNCALL); node->funcall.name = name; node->line = tok.line; node->col = tok.col; array_init(node->funcall.args, 0); while (has_next(parser)) { Token next = peek_token(parser); if (next.type == TOKEN_RPAREN) { next_token(parser); return node; } Node *arg = parse_next(parser); if (arg == NULL) { break; } array_push(node->funcall.args, arg); } push_error(ERR_TYPE_PARSER, ERR_UNMATCHED_PAREN, tok.line, tok.col); return NULL; } Node * parse_def(Parser *parser) { Token tok = next_token(parser); Node *symbol = parse_symbol(parser); if (symbol == NULL) { return NULL; } // TODO: Making type definitions mandatory for now until we introduce type // inference. Node *type = parse_type(parser); if (type == NULL) { return NULL; } Node *value = parse_next(parser); if (value == NULL) { return NULL; } if (!consume_rparen(parser)) { return NULL; } Node *node = alloc_node(NODE_DEF); node->def.symbol = symbol; node->def.value = value; node->def.type = type; node->line = tok.line; node->col = tok.col; return node; } Node * parse_set(Parser *parser) { Token tok = next_token(parser); Node *symbol = parse_symbol(parser); if (symbol == NULL) { return NULL; } Node *value = parse_next(parser); if (value == NULL) { return NULL; } if (!consume_rparen(parser)) { return NULL; } Node *node = alloc_node(NODE_SET); node->set.symbol = symbol; node->set.value = value; node->line = tok.line; node->col = tok.col; return node; } Node * parse_fun(Parser *parser) { Token tok = next_token(parser); Node *name = parse_symbol(parser); if (name == NULL) { return NULL; } Node *node = alloc_node(NODE_FUN); node->fun.name = name; array_init(node->fun.param_names, 0); array_init(node->fun.param_types, 0); node->line = tok.line; node->col = tok.col; // Parse parameter list and return type. if (!consume_lparen(parser)) { return NULL; } while (true) { Token next = peek_token(parser); if (next.type == TOKEN_RPAREN) { next_token(parser); break; } Node *name = parse_symbol(parser); if (name == NULL) { return NULL; } Node *type = parse_type(parser); if (type == NULL) { return NULL; } array_push(node->fun.param_names, name); array_push(node->fun.param_types, type); } Node *ret_type = parse_type(parser); if (ret_type == NULL) { return NULL; } node->fun.return_type = ret_type; Node *body = parse_next(parser); if (body == NULL) { return NULL; } node->fun.body = body; if (!consume_rparen(parser)) { return NULL; } return node; } Node * parse_if(Parser *parser) { Token tok = next_token(parser); Node *node = alloc_node(NODE_IF); node->ifexpr.cond = NULL; node->ifexpr.expr_true = NULL; node->ifexpr.expr_false = NULL; node->line = tok.line; node->col = tok.col; Node *cond = parse_next(parser); if (cond == NULL) { return NULL; } Node *expr_true = parse_next(parser); if (expr_true == NULL) { return NULL; } node->ifexpr.cond = cond; node->ifexpr.expr_true = expr_true; tok = peek_token(parser); // Optional else statement. if (tok.type != TOKEN_RPAREN) { Node *expr_false = parse_next(parser); if (expr_false == NULL) { return NULL; } node->ifexpr.expr_false = expr_false; } if (!consume_rparen(parser)) { return NULL; } return node; } Node * parse_paren(Parser *parser) { next_token(parser); // Skip paren. Token tok = peek_token(parser); switch (tok.type) { // Builtin functions. case TOKEN_ADD: case TOKEN_SUB: case TOKEN_MUL: case TOKEN_DIV: case TOKEN_MOD: case TOKEN_NOT: case TOKEN_AND: case TOKEN_OR: case TOKEN_EQ: case TOKEN_LT: case TOKEN_GT: case TOKEN_LE: case TOKEN_GE: { return parse_builtin(parser); } break; // Special functions. case TOKEN_DEF: { return parse_def(parser); } break; case TOKEN_SET: { return parse_set(parser); } break; case TOKEN_FUN: { return parse_fun(parser); } break; case TOKEN_IF: { return parse_if(parser); } break; default: break; } return parse_funcall(parser); } Node * parse_block(Parser *parser) { Token tok = next_token(parser); Node *node = alloc_node(NODE_BLOCK); array_init(node->block.expr, 0); node->line = tok.line; node->col = tok.col; while (true) { Token next = peek_token(parser); if (next.type == TOKEN_RCURLY) { next_token(parser); break; } Node *expr = parse_next(parser); if (expr == NULL) { return NULL; } array_push(node->block.expr, expr); } return node; } Node * parse_next(Parser *parser) { Token tok = peek_token(parser); switch (tok.type) { case TOKEN_NUMBER: { return parse_number(parser); } break; case TOKEN_STRING: { return parse_string(parser); } break; case TOKEN_SYMBOL: { return parse_symbol(parser); } break; case TOKEN_TRUE: case TOKEN_FALSE: { return parse_bool(parser); } break; case TOKEN_LPAREN: { return parse_paren(parser); } break; case TOKEN_EOF: { return NULL; } break; case TOKEN_LCURLY: { return parse_block(parser); } break; default: { push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_TOK_TYPE, tok.line, tok.col); return NULL; } break; } } bool parse_roots(Parser *parser) { while (has_next(parser)) { Token tok = peek_token(parser); if (tok.type == TOKEN_EOF) { break; } Node *node = parse_next(parser); if (node == NULL) { return false; } array_push(parser->roots, node); } return true; } Root * parse(Token *tokens) { Parser parser = { .tokens = tokens, .current_token = 0, }; array_init(parser.roots, 0); if (!parse_roots(&parser)) { return NULL; } return parser.roots; }