From fa32ad3224b3e362e5f79eee8785334f4bebdbc8 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Sat, 12 Feb 2022 19:06:09 +0100 Subject: Add boilerplate for parser --- src/lexer.c | 62 +--- src/main.c | 11 +- src/parser.c | 968 +++++++---------------------------------------------------- src/parser.h | 188 +++--------- 4 files changed, 154 insertions(+), 1075 deletions(-) (limited to 'src') diff --git a/src/lexer.c b/src/lexer.c index 5175b1c..f63ff4f 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -141,67 +141,6 @@ is_delimiter(char c) { return false; } -size_t -scan_number_token(Scanner *scanner) { - // TODO: This looks like more a parsing problem than lexer, - // consider moving it there. If starts with `-` and there is no - // delimiter after, or if it starts with a number, it is - // TOKEN_NUMBER. - char first = scan_next(scanner); - char second = scan_peek(scanner); - size_t n = 1; - if (first == '0' && !is_delimiter(second)) { - if (second == 'x') { - // Hex constant. - scan_next(scanner); - n++; - if (is_delimiter(scan_peek(scanner))) { - return 0; - } - while (!is_delimiter(scan_peek(scanner))) { - char c = scan_next(scanner); - if (!(c >= '0' && c <= '9') && - !(c >= 'a' && c <= 'f') && - !(c >= 'A' && c <= 'F')) { - return 0; - } - n++; - } - return n; - } else if (second == 'b') { - // Binary constant. - scan_next(scanner); - n++; - if (is_delimiter(scan_peek(scanner))) { - return 0; - } - while (!is_delimiter(scan_peek(scanner))) { - char c = scan_next(scanner); - if (!(c == '0' || c == '1')) { - return 0; - } - n++; - } - } - } - - // Decimal number or floating point. - bool has_dot = false; - while (!is_delimiter(scan_peek(scanner))) { - char c = scan_next(scanner); - if (c == '.') { - if (has_dot) { - return 0; - } - has_dot = true; - } else if (!(c >= '0' && c <= '9')) { - return 0; - } - n++; - } - return n; -} - TokenType find_token_type(const StringView value) { for (size_t i = 0; i < sizeof(keywords) / sizeof(Keyword); i++) { @@ -285,6 +224,7 @@ tokenize(const StringView *sv) { } size_t n = 1; bool is_number = c == '-' && !is_delimiter(scan_peek(&scanner)); + is_number = c == '+' && !is_delimiter(scan_peek(&scanner)); is_number = is_number || (c >= '0' && c <= '9'); if (is_number) { while (!is_delimiter(scan_peek(&scanner))) { diff --git a/src/main.c b/src/main.c index 31fb5da..7a24895 100644 --- a/src/main.c +++ b/src/main.c @@ -7,7 +7,7 @@ #include "string_view.c" #include "errors.c" #include "lexer.c" -// #include "parser.c" +#include "parser.c" // #include "ir.h" // #include "compiler.h" @@ -25,11 +25,14 @@ void process_source(const StringView *source, const char *file_name) { // Read tokens. Token *tokens = tokenize(source); - print_tokens(tokens); + // print_tokens(tokens); + check_errors(file_name); + + // Parser. + parse(tokens); + // print_program(program); check_errors(file_name); - // // Parser. - // Program program = parse(tokens, &errors); // if (errors.n != 0) { // report_errors(&errors, file_name); // free_objects(); diff --git a/src/parser.c b/src/parser.c index a5e2b42..dfc3e56 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,902 +1,150 @@ #include "parser.h" #include "darray.h" -static Object **objects = NULL; -static Root *roots = NULL; -static Environment **environments = NULL; - -// Builtin procedures. -static Object builtins[] = { - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_ADD, .builtin_text = STRING("+") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_SUB, .builtin_text = STRING("-") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_MUL, .builtin_text = STRING("*") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_DIV, .builtin_text = STRING("/") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_MOD, .builtin_text = STRING("%") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_EQ, .builtin_text = STRING("=") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_LT, .builtin_text = STRING("<") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_GT, .builtin_text = STRING(">") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_LE, .builtin_text = STRING("<=") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_GE, .builtin_text = STRING(">=") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_NOT, .builtin_text = STRING("not") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_AND, .builtin_text = STRING("and") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_OR, .builtin_text = STRING("or") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_NIL, .builtin_text = STRING("nil?") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_ZERO, .builtin_text = STRING("zero?") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_FIXNUM, .builtin_text = STRING("fixnum?") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_BOOL, .builtin_text = STRING("bool?") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_PRINT, .builtin_text = STRING("print") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_CONS, .builtin_text = STRING("cons") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_CAR, .builtin_text = STRING("car") }, - { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_CDR, .builtin_text = STRING("cdr") }, -}; - -// Static singleton objects. -static Object obj_nil = { .type = OBJ_TYPE_NIL }; -static Object obj_true = { .type = OBJ_TYPE_TRUE }; -static Object obj_false = { .type = OBJ_TYPE_FALSE }; - -Token -peek_token(const Parser *parser) { - if (parser->current >= array_size(parser->tokens)) { - return parser->tokens[array_size(parser->tokens) - 1]; - } - return parser->tokens[parser->current]; -} - Token next_token(Parser *parser) { - if (parser->current >= array_size(parser->tokens)) { - return parser->tokens[array_size(parser->tokens) - 1]; - } return parser->tokens[parser->current++]; } Token -previous_token(Parser *parser) { - return parser->tokens[parser->current - 1]; -} - -Token -rewind_token(Parser *parser) { - return parser->tokens[--parser->current]; +peek_token(Parser *parser) { + return parser->tokens[parser->current]; } bool -has_next_token(const Parser *parser) { - return parser->current < array_size(parser->tokens) - && peek_token(parser).type != TOKEN_EOF; -} - -Object * -parse_fixnum(Token tok) { - ssize_t num = 0; - ssize_t sign = 1; - for (size_t i = 0; i < tok.value.n; i++) { - char c = tok.value.start[i]; - if (c == '-') { - sign = -1; - continue; - } - num = num * 10 + (c - '0'); - } - Object *ret = object_alloc(tok, OBJ_TYPE_FIXNUM); - ret->fixnum = num * sign; - return ret; -} - -Object * -parse_bool(Token tok) { - if (tok.type == TOKEN_TRUE) { - return &obj_true; - } - return &obj_false; -} - -Object * -parse_string(Token tok) { - Object *ret = object_alloc(tok, OBJ_TYPE_STRING); - ret->text = tok.value; - return ret; -} - -Object * -parse_symbol(Token tok) { - // Check if symbol is a builtin procedure. - size_t n_builtins = sizeof(builtins) / sizeof(Object); - for (size_t i = 0; i < n_builtins; ++i) { - if (sv_equal(&tok.value, &builtins[i].builtin_text)) { - return &builtins[i]; - } - } - - Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL); - ret->text = tok.value; - return ret; +has_next(Parser *parser) { + return parser->current < array_size(parser->tokens); } -Object * -parse_lambda(Parser *parser, Errors *errors) { - Token start = next_token(parser); - Object *lambda = object_alloc(start, OBJ_TYPE_LAMBDA); - array_init(lambda->params, 0); - array_init(lambda->body, 0); - lambda->env = NULL; - - // Parse parameters. +Node +parse_number(Parser *parser) { Token tok = next_token(parser); - if (tok.type == TOKEN_LPAREN) { - while (has_next_token(parser)) { - Token tok = next_token(parser); - if (tok.type == TOKEN_RPAREN) { - break; - } - if (tok.type != TOKEN_SYMBOL) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_WRONG_ARG_TYPE, - .line = tok.line, - .col = tok.col, - }); - } - Object *symbol = parse_symbol(tok); - array_push(lambda->params, symbol); - } - } else if (tok.type != TOKEN_NIL) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_WRONG_ARG_TYPE, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } - - // Parse body. - bool done = false; - while (has_next_token(parser)) { - Token tok = peek_token(parser); - if (tok.type == TOKEN_RPAREN) { - next_token(parser); - done = true; - break; - } - Object *expr = parse_tree(parser, errors); - array_push(lambda->body, expr); - } - if (!done) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_UNBALANCED_PAREN, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } - if (array_size(lambda->body) == 0) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_NOT_ENOUGH_ARGS, - .line = start.line, - .col = start.col, - }); - return NULL; - } - return lambda; -} - -Object * -parse_if(Parser *parser, Errors *errors) { - Token start = next_token(parser); - Object *ret = object_alloc(start, OBJ_TYPE_IF); - - Token tok = peek_token(parser); - if (tok.type == TOKEN_RPAREN) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_NOT_ENOUGH_ARGS, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } - ret->condition = parse_tree(parser, errors); - - tok = peek_token(parser); - if (tok.type == TOKEN_RPAREN) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_NOT_ENOUGH_ARGS, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } - ret->expr_true = parse_tree(parser, errors); - - // Optional else expression. - tok = peek_token(parser); - if (tok.type == TOKEN_RPAREN) { - next_token(parser); - ret->expr_false = NULL; - return ret; - } - ret->expr_false = parse_tree(parser, errors); - - tok = peek_token(parser); - if (tok.type == TOKEN_EOF) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_UNBALANCED_PAREN, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } - if (tok.type != TOKEN_RPAREN) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_TOO_MANY_ARGS, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } - next_token(parser); - return ret; -} - -Object * -parse_var(Parser *parser, Errors *errors) { - Token start = next_token(parser); - ObjectType type = start.type == TOKEN_DEF ? OBJ_TYPE_DEF : OBJ_TYPE_SET; - Object *ret = object_alloc(start, type); - - // Variable name. - Token tok = peek_token(parser); - if (tok.type == TOKEN_RPAREN) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_NOT_ENOUGH_ARGS, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } - if (tok.type != TOKEN_SYMBOL) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_WRONG_ARG_TYPE, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } - ret->var_name = parse_tree(parser, errors); - - // Variable value (expression). - tok = peek_token(parser); - if (tok.type == TOKEN_RPAREN) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_NOT_ENOUGH_ARGS, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } - ret->var_expr = parse_tree(parser, errors); - - tok = peek_token(parser); - if (tok.type == TOKEN_EOF) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_UNBALANCED_PAREN, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } - if (tok.type != TOKEN_RPAREN) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_TOO_MANY_ARGS, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } - next_token(parser); - return ret; -} - -Object * -parse_fun(Parser *parser, Errors *errors) { - Token start = next_token(parser); - Object *ret = object_alloc(start, OBJ_TYPE_DEF); - - // Variable name. - Token tok = peek_token(parser); - if (tok.type == TOKEN_RPAREN) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_NOT_ENOUGH_ARGS, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } - if (tok.type != TOKEN_SYMBOL) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_WRONG_ARG_TYPE, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } - ret->var_name = parse_tree(parser, errors); - - // Variable value (expression). - rewind_token(parser); - ret->var_expr = parse_lambda(parser, errors); - return ret; -} - -Object * -parse_list(Parser *parser, Errors *errors) { - if (errors->n != 0) { - return NULL; + // TODO: do the dance. + // if error: + // return (Node){.type = NODE_ERR}; + + // size_t + // scan_number_token(Scanner *scanner) { + // // TODO: This looks like more a parsing problem than lexer, + // // consider moving it there. If starts with `-` and there is no + // // delimiter after, or if it starts with a number, it is + // // TOKEN_NUMBER. + // char first = scan_next(scanner); + // char second = scan_peek(scanner); + // size_t n = 1; + // if (first == '0' && !is_delimiter(second)) { + // if (second == 'x') { + // // Hex constant. + // scan_next(scanner); + // n++; + // if (is_delimiter(scan_peek(scanner))) { + // return 0; + // } + // while (!is_delimiter(scan_peek(scanner))) { + // char c = scan_next(scanner); + // if (!(c >= '0' && c <= '9') && + // !(c >= 'a' && c <= 'f') && + // !(c >= 'A' && c <= 'F')) { + // return 0; + // } + // n++; + // } + // return n; + // } else if (second == 'b') { + // // Binary constant. + // scan_next(scanner); + // n++; + // if (is_delimiter(scan_peek(scanner))) { + // return 0; + // } + // while (!is_delimiter(scan_peek(scanner))) { + // char c = scan_next(scanner); + // if (!(c == '0' || c == '1')) { + // return 0; + // } + // n++; + // } + // } + // } + + // // Decimal number or floating point. + // bool has_dot = false; + // while (!is_delimiter(scan_peek(scanner))) { + // char c = scan_next(scanner); + // if (c == '.') { + // if (has_dot) { + // return 0; + // } + // has_dot = true; + // } else if (!(c >= '0' && c <= '9')) { + // return 0; + // } + // n++; + // } + // return n; + // } + return (Node){.type = NODE_NUMBER, .string = 53}; +} + +Node +parse_next(Parser *parser) { + if (!has_next(parser)) { + return; } Token tok = peek_token(parser); switch (tok.type) { - case TOKEN_RPAREN: { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_UNBALANCED_PAREN, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } break; - case TOKEN_LAMBDA: { return parse_lambda(parser, errors); } break; - case TOKEN_IF: { return parse_if(parser, errors); } break; - case TOKEN_DEF: { return parse_var(parser, errors); } break; - case TOKEN_SET: { return parse_var(parser, errors); } break; - case TOKEN_FUN: { return parse_fun(parser, errors); } break; - default: break; - } - - Token start = previous_token(parser); - Object *root = object_alloc(start, OBJ_TYPE_PAIR); - root->head = NULL; - root->tail = NULL; - root->n_elems = 0; - Object *current = root; - bool first = true; - while (has_next_token(parser)) { - Token tok = peek_token(parser); - current->head = parse_tree(parser, errors); - if (errors->n != 0 || current->head == NULL) { - return NULL; - } - - if (first) { - if (!IS_SYMBOL(current->head) && - !IS_LAMBDA(current->head) && - !IS_BUILTIN(current->head)) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_NOT_CALLABLE, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } - first = false; - } - - tok = peek_token(parser); - if (tok.type == TOKEN_RPAREN) { - next_token(parser); - return root; - } - - Object *next = object_alloc(start, OBJ_TYPE_PAIR); - next->head = NULL; - next->tail = NULL; - current->tail = next; - current = current->tail; - root->n_elems++; - } - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_UNBALANCED_PAREN, - .line = start.line, - .col = start.col, - }); - return NULL; -} - -Object * -parse_tree(Parser *parser, Errors *errors) { - Token tok = next_token(parser); - if (errors->n != 0) { - return NULL; - } - switch (tok.type) { - case TOKEN_FIXNUM: { - return parse_fixnum(tok); - } break; - case TOKEN_TRUE: - case TOKEN_FALSE: { - return parse_bool(tok); - } break; - case TOKEN_RPAREN: { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_UNBALANCED_PAREN, - .line = tok.line, - .col = tok.col, - }); - return NULL; - } break; - case TOKEN_LPAREN: { - return parse_list(parser, errors); + case TOKEN_NUMBER: { + return parse_number(parser); } break; case TOKEN_STRING: { - return parse_string(tok); - } break; - case TOKEN_SYMBOL: { - return parse_symbol(tok); + // printf("STRING!\n"); + next_token(parser); // FIXME: <==== + return (Node){.type = NODE_STRING, .string = tok.value}; + // TODO: parse_string(parser); } break; - case TOKEN_NIL: { - return &obj_nil; + case TOKEN_LPAREN: { + // printf("LPAREN OH MY!\n"); + // TODO: parse_list(parser); } break; default: { - break; + // printf("OH OHHHH\n"); + // ... } break; } - return NULL; -} - -ssize_t -find_var_index(Object **vars, Object *symbol) { - for (size_t i = 0; i < array_size(vars); i++) { - if (object_equal(vars[i], symbol)) { - return i; - } - } - return -1; -} - -Object * -symbol_in_env(Environment *env, Object *symbol) { - while (env != NULL) { - ssize_t idx = find_var_index(env->locals, symbol); - if (idx != -1) { - return env->local_values[idx]; - } - idx = find_var_index(env->params, symbol); - if (idx != -1) { - return env->params[idx]; - } - env = env->parent; - } - return NULL; -} - -void -insert_local(Environment *env, Object *symbol, Object *value) { - ssize_t idx = find_var_index(env->locals, symbol); - if (idx != -1) { - env->local_values[idx] = value; - return; - } - array_push(env->locals, symbol); - array_push(env->local_values, value); + next_token(parser); // FIXME: <==== + // TODO: this should be an error } void -insert_params(Environment *env, Object *symbol) { - if (find_var_index(env->params, symbol) != -1) { - return; - } - array_push(env->params, symbol); -} - -void -insert_captured(Environment *env, Object *symbol) { - if (find_var_index(env->captured, symbol) != -1) { - return; - } - array_push(env->captured, symbol); -} - -void -semantic_analysis(Environment *env, Object *obj, Errors *errors) { - if (obj == NULL || obj->visited) { - return; - } - obj->visited = true; - switch (obj->type) { - case OBJ_TYPE_SYMBOL: { - Object *found = NULL; - - Environment *cur_env = env; - while (cur_env != NULL) { - ssize_t idx = find_var_index(cur_env->locals, obj); - if (idx != -1) { - found = cur_env->local_values[idx]; - if (cur_env != env) { - insert_captured(env, obj); - } - break; - } - idx = find_var_index(cur_env->params, obj); - if (idx != -1) { - found = cur_env->params[idx]; - break; - } - cur_env = cur_env->parent; - } - - // Check if symbol is in other environments. - if (found == NULL) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_SYMBOL_NOT_FOUND, - .line = obj->line, - .col = obj->col, - }); - return; - } - semantic_analysis(env, found, errors); - } break; - case OBJ_TYPE_DEF: { - insert_local(env, obj->var_name, obj->var_expr); - semantic_analysis(env, obj->var_expr, errors); - } break; - case OBJ_TYPE_SET: { - semantic_analysis(env, obj->var_name, errors); - semantic_analysis(env, obj->var_expr, errors); - } break; - case OBJ_TYPE_IF: { - semantic_analysis(env, obj->condition, errors); - semantic_analysis(env, obj->expr_true, errors); - semantic_analysis(env, obj->expr_false, errors); +print_node(Node node) { + switch (node.type) { + case NODE_NUMBER: { + printf("%ld\n", node.number); } break; - case OBJ_TYPE_PAIR: { - Object *head = obj->head; - if (IS_SYMBOL(head)) { - head = symbol_in_env(env, head); - if (head == NULL) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_SYMBOL_NOT_FOUND, - .line = obj->head->line, - .col = obj->head->col, - }); - return; - } - } - if (IS_LAMBDA(head)) { - if (obj->n_elems != array_size(head->params)) { - error_push(errors, (Error){ - .type = ERR_TYPE_PARSER, - .value = ERR_NOT_ENOUGH_ARGS, - .line = obj->line, - .col = obj->col - }); - return; - } - } - semantic_analysis(env, obj->head, errors); - semantic_analysis(env, obj->tail, errors); + case NODE_STRING: { + sv_write(&node.string); + printf("\n"); } break; - case OBJ_TYPE_LAMBDA: { - // Initialize scope for this lambda. - Environment *new_env = env_alloc(env); - obj->env = new_env; - for (size_t i = 0; i < array_size(obj->params); i++) { - insert_params(obj->env, obj->params[i]); - } - // Used for removing unnecessary statements. - Object **new_body = NULL; - array_init(new_body, 0); - for (size_t i = 0; i < array_size(obj->body); i++) { - Object *expr = obj->body[i]; - if (i != array_size(obj->body) - 1) { - if (IS_FIXNUM(expr) || - IS_STRING(expr) || - IS_SYMBOL(expr) || - IS_LAMBDA(expr) || - IS_BOOL(expr) || - IS_NIL(expr)) { - continue; - } - } - semantic_analysis(obj->env, expr, errors); - array_push(new_body, expr); - } - array_free(obj->body); - obj->body = new_body; + default: { + // ... } break; - default: break; } } -Program -parse(Token *tokens, Errors *errors) { - array_init(roots, 0); - array_init(objects, 0); - array_init(environments, 0); +void +parse(Token *tokens) { Parser parser = { .tokens = tokens, .current = 0, }; - - // Build initial ASTs. This also ensures the core grammar is correct. - while (has_next_token(&parser)) { - Object *root = parse_tree(&parser, errors); - if (errors->n != 0) { - return (Program){0}; + while (has_next(&parser)) { + Node node = parse_next(&parser); + if (node.type == NODE_ERR) { + return; } - array_push(roots, root); - } - - // Prepare global environment. - Environment *env = env_alloc(NULL); - - // Perform semantic analysis: - // 1. Populate symbol tables and ensure symbols are in scope when used. - // 2. Removing unnecessary expressions. - // 3. Verify number of arguments is correct in function calls. - Root *final_roots = NULL; - array_init(final_roots, 0); - for (size_t i = 0; i < array_size(roots); i++) { - Object *root = roots[i]; - if (i != array_size(roots) - 1) { - if (IS_FIXNUM(root) || - IS_STRING(root) || - IS_SYMBOL(root) || - IS_LAMBDA(root) || - IS_BOOL(root) || - IS_NIL(root)) { - continue; - } - } - array_push(final_roots, root); - semantic_analysis(env, root, errors); - if (errors->n != 0) { - array_free(final_roots); - return (Program){0}; - } - } - array_free(roots); - roots = final_roots; - - // TODO: Check if primitive procedures have been given the right number of - // arguments. - // TODO: Type check basic expressions (e.g. arithmetic/numeric comparisons). - // We can't be sure when we have functions unless the return type is known. - - return (Program){roots, env}; -} - -Environment * -env_alloc(Environment *parent) { - Environment *env = malloc(sizeof(Environment)); - env->locals = NULL; - env->local_values = NULL; - env->params = NULL; - array_init(env->locals, 0); - array_init(env->local_values, 0); - array_init(env->params, 0); - array_init(env->captured, 0); - env->parent = parent; - array_push(environments, env); - return env; -} - -Object * -object_alloc(Token tok, ObjectType type) { - Object *node = malloc(sizeof(Object)); - node->line = tok.line; - node->col = tok.col; - node->type = type; - node->visited = false; - array_push(objects, node); - return node; -} - -void -object_free(Object *node) { - if (node == NULL) { - return; - } - if (IS_LAMBDA(node)) { - array_free(node->params); - array_free(node->body); - } - free(node); -} - -void -free_objects(void) { - if (objects != NULL) { - for (size_t i = 0; i < array_size(objects); i++) { - object_free(objects[i]); - } - array_free(objects); - } - if (environments != NULL) { - for (size_t i = 0; i < array_size(environments); i++) { - Environment *env = environments[i]; - array_free(env->locals); - array_free(env->local_values); - array_free(env->params); - array_free(env->captured); - free(env); - } - array_free(environments); - } - array_free(roots); -} - -void -display_pair(Object *obj) { - object_display(obj->head); - if (obj->tail == NULL) { - return; - } - if (IS_PAIR(obj->tail)) { - printf(" "); - display_pair(obj->tail); - } else { - printf(" . "); - object_display(obj->tail); - } -} - -void -object_display(Object *obj) { - if (obj == NULL) { - printf("#{error}"); - return; - } - switch (obj->type) { - case OBJ_TYPE_FIXNUM: { - printf("%zd", obj->fixnum); - } break; - case OBJ_TYPE_TRUE: { - printf("true"); - } break; - case OBJ_TYPE_FALSE: { - printf("false"); - } break; - case OBJ_TYPE_NIL: { - printf("()"); - } break; - case OBJ_TYPE_STRING: { - printf("\"%.*s\"", (int)obj->text.n, obj->text.start); - } break; - case OBJ_TYPE_SYMBOL: { - printf(":%.*s", (int)obj->text.n, obj->text.start); - } break; - case OBJ_TYPE_PAIR: { - printf("("); - display_pair(obj); - printf(")"); - } break; - case OBJ_TYPE_LAMBDA: { - printf("#{ lambda ( "); - for (size_t i = 0; i < array_size(obj->params); i++) { - object_display(obj->params[i]); - printf(" "); - } - printf(") "); - for (size_t i = 0; i < array_size(obj->body); i++) { - object_display(obj->body[i]); - printf(" "); - } - printf("}"); - } break; - case OBJ_TYPE_IF: { - printf("#{ if "); - object_display(obj->condition); - printf(" "); - object_display(obj->expr_true); - if (obj->expr_false != NULL) { - printf(" "); - object_display(obj->expr_false); - } - printf(" }"); - } break; - case OBJ_TYPE_DEF: { - printf("#{ def "); - object_display(obj->var_name); - printf(" "); - object_display(obj->var_expr); - printf(" }"); - } break; - case OBJ_TYPE_SET: { - printf("#{ set! "); - object_display(obj->var_name); - printf(" "); - object_display(obj->var_expr); - printf(" }"); - } break; - case OBJ_TYPE_BUILTIN: { - printf("%.*s", (int)obj->builtin_text.n, obj->builtin_text.start); - } break; - } - return; -} - -bool -object_equal(Object *a, Object *b) { - if (a == NULL || b == NULL || a->type != b->type) { - return false; - } - switch (a->type) { - case OBJ_TYPE_TRUE: - case OBJ_TYPE_FALSE: { - return true; - } break; - case OBJ_TYPE_FIXNUM: { - return a->fixnum == b->fixnum; - } break; - case OBJ_TYPE_SYMBOL: - case OBJ_TYPE_STRING: { - return sv_equal(&a->text, &b->text); - } break; - case OBJ_TYPE_BUILTIN: { - return a->builtin == b->builtin; - } break; - case OBJ_TYPE_PAIR: { - Object *a_head = a->head; - Object *b_head = b->head; - if (!object_equal(a_head, b_head)) { - return false; - } - Object *a_tail = a->tail; - Object *b_tail = b->tail; - while (a_tail != NULL && b_tail != NULL) { - if (!object_equal(a_head, b_head)) { - return false; - } - a_head = a_tail->head; - b_head = b_tail->head; - a_tail = a_tail->tail; - b_tail = b_tail->tail; - } - if (a_tail == b_tail && object_equal(a_head, b_head)) { - return true; - } - return false; - } break; - case OBJ_TYPE_LAMBDA: { - size_t n_params_a = array_size(a->params); - size_t n_params_b = array_size(b->params); - size_t n_expr_a = array_size(a->body); - size_t n_expr_b = array_size(b->body); - if (n_params_a != n_params_b || n_expr_a != n_expr_b) { - return false; - } - for (size_t i = 0; i < n_params_a; ++i) { - if (!object_equal(a->params[i], b->params[i])) { - return false; - } - } - for (size_t i = 0; i < n_expr_a; ++i) { - if (!object_equal(a->body[i], b->body[i])) { - return false; - } - } - return true; - } break; - default: break; + print_node(node); + // Token tok = next_token(&parser); + // print_token(tok); } - return false; } diff --git a/src/parser.h b/src/parser.h index 2604be4..3e016d3 100644 --- a/src/parser.h +++ b/src/parser.h @@ -3,162 +3,50 @@ #include "lexer.h" -typedef struct Environment { - struct Object **locals; - struct Object **local_values; - struct Object **params; - struct Object **captured; - struct Environment *parent; -} Environment; - -typedef enum ObjectType { - OBJ_TYPE_NIL, - OBJ_TYPE_TRUE, - OBJ_TYPE_FALSE, - OBJ_TYPE_FIXNUM, - OBJ_TYPE_SYMBOL, - OBJ_TYPE_STRING, - OBJ_TYPE_PAIR, - OBJ_TYPE_LAMBDA, - OBJ_TYPE_IF, - OBJ_TYPE_DEF, - OBJ_TYPE_SET, - OBJ_TYPE_BUILTIN, -} ObjectType; - -typedef enum Builtin { - BUILTIN_ADD, - BUILTIN_SUB, - BUILTIN_MUL, - BUILTIN_DIV, - BUILTIN_MOD, - BUILTIN_EQ, - BUILTIN_LT, - BUILTIN_GT, - BUILTIN_LE, - BUILTIN_GE, - BUILTIN_NOT, - BUILTIN_AND, - BUILTIN_OR, - BUILTIN_IS_NIL, - BUILTIN_IS_ZERO, - BUILTIN_IS_FIXNUM, - BUILTIN_IS_BOOL, - BUILTIN_PRINT, - BUILTIN_CONS, - BUILTIN_CAR, - BUILTIN_CDR, -} Builtin; - -typedef struct Object { - ObjectType type; - union { - // OBJ_TYPE_FIXNUM - ssize_t fixnum; - - // OBJ_TYPE_STRING - // OBJ_TYPE_SYMBOL - StringView text; - - // OBJ_TYPE_PAIR - struct { - struct Object *head; - struct Object *tail; - size_t n_elems; - }; - - // OBJ_TYPE_LAMBDA - struct { - struct Object **params; - struct Object **body; - Environment *env; - }; - - // OBJ_TYPE_IF - struct { - struct Object *condition; - struct Object *expr_true; - struct Object *expr_false; - }; - - // OBJ_TYPE_DEF - // OBJ_TYPE_SET - struct { - struct Object *var_name; - struct Object *var_expr; - }; - - // OBJ_TYPE_BUILTIN - struct { - Builtin builtin; - StringView builtin_text; - }; - }; - - bool visited; - size_t line; - size_t col; -} Object; - typedef struct Parser { Token *tokens; size_t current; } Parser; -typedef Object* Root; - -typedef struct Program { - Root *roots; - Environment *env; -} Program; - -// Token scanner. -Token next_token(Parser *parser); -Token previous_token(Parser *parser); -Token rewind_token(Parser *parser); -Token peek_token(const Parser *parser); -bool has_next_token(const Parser *parser); - -// Parsing operations. -Object * parse_tree(Parser *parser, Errors *errors); -Object * parse_symbol(Token tok); -Object * parse_string(Token tok); -Object * parse_bool(Token tok); -Object * parse_fixnum(Token tok); -Object * parse_list(Parser *parser, Errors *errors); -Object * parse_lambda(Parser *parser, Errors *errors); -Object * parse_if(Parser *parser, Errors *errors); -Object * parse_var(Parser *parser, Errors *errors); -Program parse(Token *tokens, Errors *errors); +typedef enum NodeType { + NODE_ERR, + // NODE_FUNCALL, + // NODE_U64, + // NODE_U32, + // NODE_U16, + // NODE_U8, + // NODE_S64, + // NODE_S32, + // NODE_S16, + // NODE_S8, + NODE_NUMBER, + NODE_STRING, +} NodeType; + +typedef struct Node { + NodeType type; -// Object operations. -void object_display(Object *obj); -bool object_equal(Object *a, Object *b); - -// Manage resources. -Environment * env_alloc(Environment *parent); -Object * object_alloc(Token tok, ObjectType type); -void object_free(Object *node); -void free_objects(void); - -// -// Helper macros. -// - -// Type checking. -#define IS_NIL(VAL) ((VAL)->type == OBJ_TYPE_NIL) -#define IS_TRUE(VAL) ((VAL)->type != OBJ_TYPE_FALSE) -#define IS_FALSE(VAL) ((VAL)->type == OBJ_TYPE_FALSE) -#define IS_BOOL(VAL) \ - (((VAL)->type == OBJ_TYPE_FALSE) || ((VAL)->type == OBJ_TYPE_TRUE)) -#define IS_FIXNUM(VAL) ((VAL)->type == OBJ_TYPE_FIXNUM) -#define IS_STRING(VAL) ((VAL)->type == OBJ_TYPE_STRING) -#define IS_SYMBOL(VAL) ((VAL)->type == OBJ_TYPE_SYMBOL) -#define IS_PAIR(VAL) ((VAL)->type == OBJ_TYPE_PAIR) -#define IS_LAMBDA(VAL) ((VAL)->type == OBJ_TYPE_LAMBDA) -#define IS_BUILTIN(VAL) ((VAL)->type == OBJ_TYPE_BUILTIN) + union { + // Integer numbers. + // u64 as_u64; + // u32 as_u32; + // u16 as_u16; + // u8 as_u8; + // s64 as_s64; + // s32 as_s32; + // s16 as_s16; + // s8 as_s8; + s64 number; + + // String. + StringView string; + // struct { + // u8 *str; + // u64 n; + // } as_str; + }; +} Node; -// Debug. -#define OBJ_PRINT(OBJ) object_display(OBJ); printf("\n"); +void parse(Token *tokens); #endif // BDL_PARSER_H -- cgit v1.2.1