#include "parser.h" #include "darray.h" Token next_token(Parser *parser) { return parser->tokens[parser->current++]; } Token peek_token(Parser *parser) { return parser->tokens[parser->current]; } bool has_next(Parser *parser) { return parser->current < array_size(parser->tokens); } bool consume_rparen(Parser *parser) { Token tok = next_token(parser); if (tok.type != TOKEN_RPAREN) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_RPAREN, tok.line, tok.col); return false; } return true; } bool consume_lparen(Parser *parser) { Token tok = next_token(parser); if (tok.type != TOKEN_LPAREN) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_LPAREN, tok.line, tok.col); return false; } return true; } Node * parse_number(Parser *parser) { Token tok = next_token(parser); if (tok.type != TOKEN_NUMBER) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_NUMBER, tok.line, tok.col); return NULL; } bool negative = false; int base = 10; char c = sv_next(&tok.value); if (c == '-') { negative = true; c = sv_next(&tok.value); } if (c == '+') { c = sv_next(&tok.value); } if (c == '0') { c = sv_next(&tok.value); if (c == 'x') { base = 16; c = sv_next(&tok.value); } else if (c == 'b') { base = 2; c = sv_next(&tok.value); } else if (!(c >= '0' && c <= '9')){ push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); return NULL; } } // Integral part. u64 integral = 0; while (c != '\0') { ssize_t current = 0; if (c >= 'a' && c <= 'z' && base == 16) { current = (c - 'a') + 10; } else if (c >= 'A' && c <= 'Z' && base == 16) { current = (c - 'A') + 10; } else if (c >= '0' && c <= '9') { current = (c - '0'); } else if (c == '.') { c = sv_next(&tok.value); break; } else { push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); return NULL; } integral = integral * base + current; c = sv_next(&tok.value); } // Fractional part. u64 fractional = 0; while (c != '\0') { ssize_t current = 0; if (c >= 'a' && c <= 'z' && base == 16) { current = (c - 'a') + 10; } else if (c >= 'A' && c <= 'Z' && base == 16) { current = (c - 'A') + 10; } else if (c >= '0' && c <= '9') { current = (c - '0'); } else { push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); return NULL; } fractional = fractional * base + current; c = sv_next(&tok.value); } Node * node = alloc_node(NODE_NUMBER); node->number.negative = negative; node->number.integral = integral; node->number.fractional = fractional; return node; } Node * parse_string(Parser *parser) { Token tok = next_token(parser); if (tok.type != TOKEN_STRING) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_STRING, tok.line, tok.col); return NULL; } Node *node = alloc_node(NODE_STRING); node->string = tok.value; return node; } Node * parse_symbol(Parser *parser) { Token tok = next_token(parser); if (tok.type != TOKEN_SYMBOL) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_SYMBOL, tok.line, tok.col); return NULL; } Node *node = alloc_node(NODE_SYMBOL); node->string = tok.value; return node; } Node * parse_type(Parser *parser) { Token tok = next_token(parser); if (tok.type != TOKEN_COLON) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); return NULL; } tok = next_token(parser); if (tok.type != TOKEN_SYMBOL) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); return NULL; } Node *node = alloc_node(NODE_TYPE); node->string = tok.value; return node; } Node * parse_bool(Parser *parser) { Token tok = next_token(parser); if (!(tok.type == TOKEN_TRUE || tok.type == TOKEN_FALSE)) { push_error(ERR_TYPE_PARSER, ERR_NOT_A_BOOL, tok.line, tok.col); return NULL; } Node *node = alloc_node(NODE_BOOL); node->boolean = tok.type == TOKEN_TRUE; return node; } Node * parse_builtin(Parser *parser) { Token op = next_token(parser); Node *node = alloc_node(NODE_BUILTIN); node->builtin.type = op.type; array_init(node->builtin.args, 0); while (has_next(parser)) { Token next = peek_token(parser); if (next.type == TOKEN_RPAREN) { next_token(parser); return node; } Node *arg = parse_next(parser); if (arg == NULL) { break; } array_push(node->builtin.args, arg); } push_error(ERR_TYPE_PARSER, ERR_UNMATCHED_PAREN, op.line, op.col); return NULL; } Node * parse_def(Parser *parser) { next_token(parser); // Skip keyword. Node *symbol = parse_symbol(parser); if (symbol == NULL) { return NULL; } // TODO: Making type checking mandatory for now until we introduce // type inference. Node *type = parse_type(parser); if (type == NULL) { return NULL; } Node *value = parse_next(parser); if (value == NULL) { return NULL; } if (!consume_rparen(parser)) { return NULL; } Node *node = alloc_node(NODE_DEF); node->def.symbol = symbol; node->def.value = value; node->def.type = type; return node; } Node * parse_set(Parser *parser) { next_token(parser); // Skip keyword. Node *symbol = parse_symbol(parser); if (symbol == NULL) { return NULL; } Node *value = parse_next(parser); if (value == NULL) { return NULL; } if (!consume_rparen(parser)) { return NULL; } Node *node = alloc_node(NODE_SET); node->set.symbol = symbol; node->set.value = value; return node; } Node * parse_fun(Parser *parser) { next_token(parser); // Skip keyword. Node *name = parse_symbol(parser); if (name == NULL) { return NULL; } Node *node = alloc_node(NODE_FUN); node->fun.name = name; array_init(node->fun.param_names, 0); array_init(node->fun.param_types, 0); array_init(node->fun.body, 0); // Parse parameter list and return type. if (!consume_lparen(parser)) { return NULL; } while (true) { Token next = peek_token(parser); if (next.type == TOKEN_RPAREN) { next_token(parser); break; } Node *name = parse_symbol(parser); if (name == NULL) { return NULL; } Node *type = parse_type(parser); if (type == NULL) { return NULL; } array_push(node->fun.param_names, name); array_push(node->fun.param_types, type); } Node *ret_type = parse_type(parser); if (ret_type == NULL) { return NULL; } node->fun.return_type = ret_type; // Parse body. while (true) { Token next = peek_token(parser); if (next.type == TOKEN_RPAREN) { next_token(parser); break; } Node *expr = parse_next(parser); if (expr == NULL) { return NULL; } array_push(node->fun.body, expr); } return node; } Node * parse_paren(Parser *parser) { next_token(parser); // Skip paren. Token tok = peek_token(parser); switch (tok.type) { // Builtin functions. case TOKEN_ADD: case TOKEN_SUB: case TOKEN_MUL: case TOKEN_DIV: case TOKEN_MOD: case TOKEN_NOT: case TOKEN_AND: case TOKEN_OR: { return parse_builtin(parser); } break; // Special functions. case TOKEN_DEF: { return parse_def(parser); } break; case TOKEN_SET: { return parse_set(parser); } break; case TOKEN_FUN: { return parse_fun(parser); } break; default: break; } // TODO: Lookup value on symbol table. push_error(ERR_TYPE_PARSER, ERR_UNIMPLEMENTED, tok.line, tok.col); return NULL; } // TODO: Parse if: (def ...) shouldn't be allowed for now on if statements. In // the future, an if can create a new block and thus a fresh scope which would // make the decision of what variables are active trivial. Node * parse_next(Parser *parser) { Token tok = peek_token(parser); switch (tok.type) { case TOKEN_NUMBER: { return parse_number(parser); } break; case TOKEN_STRING: { return parse_string(parser); } break; case TOKEN_SYMBOL: { return parse_symbol(parser); } break; case TOKEN_TRUE: case TOKEN_FALSE: { return parse_bool(parser); } break; case TOKEN_LPAREN: { return parse_paren(parser); } break; case TOKEN_EOF: { return NULL; } break; default: { push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_TOK_TYPE, tok.line, tok.col); return NULL; } break; } } void parse(Token *tokens) { Parser parser = { .tokens = tokens, .current = 0, }; // DEBUG: TOKENS printf("-- tokens --\n"); for (size_t i = 0; i < array_size(tokens); i++) { print_token(tokens[i]); } printf("------------\n"); while (has_next(&parser)) { Node *node = parse_next(&parser); if (node == NULL) { return; } print_node(node); printf("\n"); } }