#include "parser.h" #include "darray.h" Node * alloc_node(NodeType type) { // TODO: Use a bump allocator? // TODO: Free memory! Node *node = malloc(sizeof(Node)); node->type = type; return node; } Token next_token(Parser *parser) { return parser->tokens[parser->current++]; } Token peek_token(Parser *parser) { return parser->tokens[parser->current]; } bool has_next(Parser *parser) { return parser->current < array_size(parser->tokens); } Node * parse_number(Parser *parser) { Token tok = next_token(parser); bool negative = false; int base = 10; char c = sv_next(&tok.value); if (c == '-') { negative = true; c = sv_next(&tok.value); } if (c == '+') { c = sv_next(&tok.value); } if (c == '0') { c = sv_next(&tok.value); if (c == 'x') { base = 16; c = sv_next(&tok.value); } else if (c == 'b') { base = 2; c = sv_next(&tok.value); } else if (!(c >= '0' && c <= '9')){ push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); return NULL; } } // Integral part. u64 integral = 0; while (c != '\0') { ssize_t current = 0; if (c >= 'a' && c <= 'z' && base == 16) { current = (c - 'a') + 10; } else if (c >= 'A' && c <= 'Z' && base == 16) { current = (c - 'A') + 10; } else if (c >= '0' && c <= '9') { current = (c - '0'); } else if (c == '.') { c = sv_next(&tok.value); break; } else { push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); return NULL; } integral = integral * base + current; c = sv_next(&tok.value); } // Fractional part. u64 fractional = 0; while (c != '\0') { ssize_t current = 0; if (c >= 'a' && c <= 'z' && base == 16) { current = (c - 'a') + 10; } else if (c >= 'A' && c <= 'Z' && base == 16) { current = (c - 'A') + 10; } else if (c >= '0' && c <= '9') { current = (c - '0'); } else { push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); return NULL; } fractional = fractional * base + current; c = sv_next(&tok.value); } Node * node = alloc_node(NODE_NUMBER); node->number.negative = negative; node->number.integral = integral; node->number.fractional = fractional; return node; } Node * parse_string(Parser *parser) { Token tok = next_token(parser); Node *node = alloc_node(NODE_STRING); node->string = tok.value; return node; } Node * parse_symbol(Parser *parser) { Token tok = next_token(parser); Node *node = alloc_node(NODE_SYMBOL); node->string = tok.value; return node; } Node * parse_bool(Parser *parser) { Token tok = next_token(parser); Node *node = alloc_node(NODE_BOOL); node->boolean = tok.type == TOKEN_TRUE; return node; } Node * parse_builtin(Parser *parser) { Token op = next_token(parser); Node *node = alloc_node(NODE_BUILTIN); node->builtin.type = op.type; array_init(node->builtin.args, 0); while (has_next(parser)) { Token next = peek_token(parser); if (next.type == TOKEN_RPAREN) { next_token(parser); return node; } Node *arg = parse_next(parser); if (arg == NULL) { break; } array_push(node->builtin.args, arg); } push_error(ERR_TYPE_PARSER, ERR_UNMATCHED_PAREN, op.line, op.col); return NULL; } Node * parse_def(Parser *parser) { Token op = next_token(parser); Node *symbol = parse_next(parser); if (symbol == NULL || symbol->type != NODE_SYMBOL) { push_error(ERR_TYPE_PARSER, ERR_MALFORMED_EXPR, op.line, op.col); return NULL; } // TODO: Check if it has type annotation. Node *value = parse_next(parser); if (value == NULL) { push_error(ERR_TYPE_PARSER, ERR_MALFORMED_EXPR, op.line, op.col); return NULL; } Token end = next_token(parser); if (end.type != TOKEN_RPAREN) { push_error(ERR_TYPE_PARSER, ERR_MALFORMED_EXPR, op.line, op.col); return NULL; } Node *node = alloc_node(NODE_DEF); node->def.symbol = symbol; node->def.value = value; // TODO: Register variable in symbol table. return node; } Node * parse_paren(Parser *parser) { next_token(parser); // Skip paren. Token tok = peek_token(parser); switch (tok.type) { // Builtin functions. case TOKEN_ADD: case TOKEN_SUB: case TOKEN_MUL: case TOKEN_DIV: case TOKEN_MOD: case TOKEN_NOT: case TOKEN_AND: case TOKEN_OR: { return parse_builtin(parser); } break; // Special functions. case TOKEN_DEF: { return parse_def(parser); } break; default: break; } // TODO: Lookup value on symbol table. push_error(ERR_TYPE_PARSER, ERR_UNIMPLEMENTED, tok.line, tok.col); return NULL; } Node * parse_next(Parser *parser) { Token tok = peek_token(parser); switch (tok.type) { case TOKEN_NUMBER: { return parse_number(parser); } break; case TOKEN_STRING: { return parse_string(parser); } break; case TOKEN_SYMBOL: { return parse_symbol(parser); } break; case TOKEN_TRUE: case TOKEN_FALSE: { return parse_bool(parser); } break; case TOKEN_LPAREN: { return parse_paren(parser); } break; case TOKEN_EOF: { return NULL; } break; default: { push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_TOK_TYPE, tok.line, tok.col); return NULL; } break; } } void print_node(Node *node) { switch (node->type) { case NODE_NUMBER: { if (node->number.negative) { printf("-"); } if (node->number.fractional != 0) { printf("%zu.%zu", node->number.integral, node->number.fractional); } else { printf("%zu", node->number.integral); } } break; case NODE_SYMBOL: case NODE_STRING: { sv_write(&node->string); } break; case NODE_BOOL: { if (node->boolean) { printf("true"); } else { printf("false"); } } break; case NODE_BUILTIN: { printf("("); printf("{#%s}", token_str[node->builtin.type]); size_t n_args = array_size(node->builtin.args); if (n_args != 0) { printf(" "); } for (size_t i = 0; i < n_args; ++i) { print_node(node->builtin.args[i]); if (i < n_args - 1) { printf(" "); } } printf(")"); } break; case NODE_DEF: { printf("(def "); print_node(node->def.symbol); printf(" "); print_node(node->def.value); printf(")"); } break; default: { printf("{#unk}"); } break; } } void parse(Token *tokens) { Parser parser = { .tokens = tokens, .current = 0, }; // DEBUG: TOKENS printf("-- tokens --\n"); for (size_t i = 0; i < array_size(tokens); i++) { print_token(tokens[i]); } printf("------------\n"); while (has_next(&parser)) { Node *node = parse_next(&parser); if (node == NULL) { return; } print_node(node); printf("\n"); } }