#include "parser.h" #include "darray.h" static Object **objects = NULL; static Root *roots = NULL; static Environment **environments = NULL; // Builtin procedures. static Object builtins[] = { { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_ADD, .builtin_text = STRING("+") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_SUB, .builtin_text = STRING("-") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_MUL, .builtin_text = STRING("*") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_DIV, .builtin_text = STRING("/") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_MOD, .builtin_text = STRING("%") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_EQ, .builtin_text = STRING("=") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_LT, .builtin_text = STRING("<") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_GT, .builtin_text = STRING(">") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_LE, .builtin_text = STRING("<=") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_GE, .builtin_text = STRING(">=") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_NOT, .builtin_text = STRING("not") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_AND, .builtin_text = STRING("and") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_OR, .builtin_text = STRING("or") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_NIL, .builtin_text = STRING("nil?") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_ZERO, .builtin_text = STRING("zero?") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_FIXNUM, .builtin_text = STRING("fixnum?") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_BOOL, .builtin_text = STRING("bool?") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_PRINT, .builtin_text = STRING("print") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_CONS, .builtin_text = STRING("cons") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_CAR, .builtin_text = STRING("car") }, { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_CDR, .builtin_text = STRING("cdr") }, }; // Static singleton objects. static Object obj_nil = { .type = OBJ_TYPE_NIL }; static Object obj_true = { .type = OBJ_TYPE_TRUE }; static Object obj_false = { .type = OBJ_TYPE_FALSE }; Token peek_token(const Parser *parser) { if (parser->current >= array_size(parser->tokens)) { return parser->tokens[array_size(parser->tokens) - 1]; } return parser->tokens[parser->current]; } Token next_token(Parser *parser) { if (parser->current >= array_size(parser->tokens)) { return parser->tokens[array_size(parser->tokens) - 1]; } return parser->tokens[parser->current++]; } Token previous_token(Parser *parser) { return parser->tokens[parser->current - 1]; } Token rewind_token(Parser *parser) { return parser->tokens[--parser->current]; } bool has_next_token(const Parser *parser) { return parser->current < array_size(parser->tokens) && peek_token(parser).type != TOKEN_EOF; } Object * parse_fixnum(Token tok) { ssize_t num = 0; ssize_t sign = 1; for (size_t i = 0; i < tok.value.n; i++) { char c = tok.value.start[i]; if (c == '-') { sign = -1; continue; } num = num * 10 + (c - '0'); } Object *ret = object_alloc(tok, OBJ_TYPE_FIXNUM); ret->fixnum = num * sign; return ret; } Object * parse_bool(Token tok) { if (tok.type == TOKEN_TRUE) { return &obj_true; } return &obj_false; } Object * parse_string(Token tok) { Object *ret = object_alloc(tok, OBJ_TYPE_STRING); ret->text = tok.value; return ret; } Object * parse_symbol(Token tok) { // Check if symbol is a builtin procedure. size_t n_builtins = sizeof(builtins) / sizeof(Object); for (size_t i = 0; i < n_builtins; ++i) { if (sv_equal(&tok.value, &builtins[i].builtin_text)) { return &builtins[i]; } } Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL); ret->text = tok.value; return ret; } Object * parse_lambda(Parser *parser, Errors *errors) { Token start = next_token(parser); Object *lambda = object_alloc(start, OBJ_TYPE_LAMBDA); array_init(lambda->params, 0); array_init(lambda->body, 0); lambda->env = NULL; // Parse parameters. Token tok = next_token(parser); if (tok.type == TOKEN_LPAREN) { while (has_next_token(parser)) { Token tok = next_token(parser); if (tok.type == TOKEN_RPAREN) { break; } if (tok.type != TOKEN_SYMBOL) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_WRONG_ARG_TYPE, .line = tok.line, .col = tok.col, }); } Object *symbol = parse_symbol(tok); array_push(lambda->params, symbol); } } else if (tok.type != TOKEN_NIL) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_WRONG_ARG_TYPE, .line = tok.line, .col = tok.col, }); return NULL; } // Parse body. bool done = false; while (has_next_token(parser)) { Token tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { next_token(parser); done = true; break; } Object *expr = parse_tree(parser, errors); array_push(lambda->body, expr); } if (!done) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = tok.line, .col = tok.col, }); return NULL; } if (array_size(lambda->body) == 0) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_ENOUGH_ARGS, .line = start.line, .col = start.col, }); return NULL; } return lambda; } Object * parse_if(Parser *parser, Errors *errors) { Token start = next_token(parser); Object *ret = object_alloc(start, OBJ_TYPE_IF); Token tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_ENOUGH_ARGS, .line = tok.line, .col = tok.col, }); return NULL; } ret->condition = parse_tree(parser, errors); tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_ENOUGH_ARGS, .line = tok.line, .col = tok.col, }); return NULL; } ret->expr_true = parse_tree(parser, errors); // Optional else expression. tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { next_token(parser); ret->expr_false = NULL; return ret; } ret->expr_false = parse_tree(parser, errors); tok = peek_token(parser); if (tok.type == TOKEN_EOF) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = tok.line, .col = tok.col, }); return NULL; } if (tok.type != TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_TOO_MANY_ARGS, .line = tok.line, .col = tok.col, }); return NULL; } next_token(parser); return ret; } Object * parse_var(Parser *parser, Errors *errors) { Token start = next_token(parser); ObjectType type = start.type == TOKEN_DEF ? OBJ_TYPE_DEF : OBJ_TYPE_SET; Object *ret = object_alloc(start, type); // Variable name. Token tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_ENOUGH_ARGS, .line = tok.line, .col = tok.col, }); return NULL; } if (tok.type != TOKEN_SYMBOL) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_WRONG_ARG_TYPE, .line = tok.line, .col = tok.col, }); return NULL; } ret->var_name = parse_tree(parser, errors); // Variable value (expression). tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_ENOUGH_ARGS, .line = tok.line, .col = tok.col, }); return NULL; } ret->var_expr = parse_tree(parser, errors); tok = peek_token(parser); if (tok.type == TOKEN_EOF) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = tok.line, .col = tok.col, }); return NULL; } if (tok.type != TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_TOO_MANY_ARGS, .line = tok.line, .col = tok.col, }); return NULL; } next_token(parser); return ret; } Object * parse_fun(Parser *parser, Errors *errors) { Token start = next_token(parser); Object *ret = object_alloc(start, OBJ_TYPE_DEF); // Variable name. Token tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_ENOUGH_ARGS, .line = tok.line, .col = tok.col, }); return NULL; } if (tok.type != TOKEN_SYMBOL) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_WRONG_ARG_TYPE, .line = tok.line, .col = tok.col, }); return NULL; } ret->var_name = parse_tree(parser, errors); // Variable value (expression). rewind_token(parser); ret->var_expr = parse_lambda(parser, errors); return ret; } Object * parse_list(Parser *parser, Errors *errors) { if (errors->n != 0) { return NULL; } Token tok = peek_token(parser); switch (tok.type) { case TOKEN_RPAREN: { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = tok.line, .col = tok.col, }); return NULL; } break; case TOKEN_LAMBDA: { return parse_lambda(parser, errors); } break; case TOKEN_IF: { return parse_if(parser, errors); } break; case TOKEN_DEF: { return parse_var(parser, errors); } break; case TOKEN_SET: { return parse_var(parser, errors); } break; case TOKEN_FUN: { return parse_fun(parser, errors); } break; default: break; } Token start = previous_token(parser); Object *root = object_alloc(start, OBJ_TYPE_PAIR); root->head = NULL; root->tail = NULL; root->n_elems = 0; Object *current = root; bool first = true; while (has_next_token(parser)) { Token tok = peek_token(parser); current->head = parse_tree(parser, errors); if (errors->n != 0 || current->head == NULL) { return NULL; } if (first) { if (!IS_SYMBOL(current->head) && !IS_LAMBDA(current->head) && !IS_BUILTIN(current->head)) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_CALLABLE, .line = tok.line, .col = tok.col, }); return NULL; } first = false; } tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { next_token(parser); return root; } Object *next = object_alloc(start, OBJ_TYPE_PAIR); next->head = NULL; next->tail = NULL; current->tail = next; current = current->tail; root->n_elems++; } error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.col, }); return NULL; } Object * parse_tree(Parser *parser, Errors *errors) { Token tok = next_token(parser); if (errors->n != 0) { return NULL; } switch (tok.type) { case TOKEN_FIXNUM: { return parse_fixnum(tok); } break; case TOKEN_TRUE: case TOKEN_FALSE: { return parse_bool(tok); } break; case TOKEN_RPAREN: { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = tok.line, .col = tok.col, }); return NULL; } break; case TOKEN_LPAREN: { return parse_list(parser, errors); } break; case TOKEN_STRING: { return parse_string(tok); } break; case TOKEN_SYMBOL: { return parse_symbol(tok); } break; case TOKEN_NIL: { return &obj_nil; } break; default: { break; } break; } return NULL; } ssize_t find_var_index(Object **vars, Object *symbol) { for (size_t i = 0; i < array_size(vars); i++) { if (object_equal(vars[i], symbol)) { return i; } } return -1; } Object * symbol_in_env(Environment *env, Object *symbol) { while (env != NULL) { ssize_t idx = find_var_index(env->locals, symbol); if (idx != -1) { return env->local_values[idx]; } idx = find_var_index(env->params, symbol); if (idx != -1) { return env->params[idx]; } env = env->parent; } return NULL; } void insert_local(Environment *env, Object *symbol, Object *value) { ssize_t idx = find_var_index(env->locals, symbol); if (idx != -1) { env->local_values[idx] = value; return; } array_push(env->locals, symbol); array_push(env->local_values, value); } void insert_params(Environment *env, Object *symbol) { if (find_var_index(env->params, symbol) != -1) { return; } array_push(env->params, symbol); } void insert_captured(Environment *env, Object *symbol) { if (find_var_index(env->captured, symbol) != -1) { return; } array_push(env->captured, symbol); } void semantic_analysis(Environment *env, Object *obj, Errors *errors) { if (obj == NULL || obj->visited) { return; } obj->visited = true; switch (obj->type) { case OBJ_TYPE_SYMBOL: { Object *found = NULL; Environment *cur_env = env; while (cur_env != NULL) { ssize_t idx = find_var_index(cur_env->locals, obj); if (idx != -1) { found = cur_env->local_values[idx]; if (cur_env != env && cur_env->parent != NULL) { insert_captured(env, obj); } break; } idx = find_var_index(cur_env->params, obj); if (idx != -1) { found = cur_env->params[idx]; break; } cur_env = cur_env->parent; } // Check if symbol is in other environments. if (found == NULL) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_SYMBOL_NOT_FOUND, .line = obj->line, .col = obj->col, }); return; } semantic_analysis(env, found, errors); } break; case OBJ_TYPE_DEF: { insert_local(env, obj->var_name, obj->var_expr); semantic_analysis(env, obj->var_expr, errors); } break; case OBJ_TYPE_SET: { semantic_analysis(env, obj->var_name, errors); semantic_analysis(env, obj->var_expr, errors); } break; case OBJ_TYPE_IF: { semantic_analysis(env, obj->condition, errors); semantic_analysis(env, obj->expr_true, errors); semantic_analysis(env, obj->expr_false, errors); } break; case OBJ_TYPE_PAIR: { Object *head = obj->head; if (IS_SYMBOL(head)) { head = symbol_in_env(env, head); if (head == NULL) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_SYMBOL_NOT_FOUND, .line = obj->head->line, .col = obj->head->col, }); return; } } if (IS_LAMBDA(head)) { if (obj->n_elems != array_size(head->params)) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_ENOUGH_ARGS, .line = obj->line, .col = obj->col }); return; } } semantic_analysis(env, obj->head, errors); semantic_analysis(env, obj->tail, errors); } break; case OBJ_TYPE_LAMBDA: { // Initialize scope for this lambda. Environment *new_env = env_alloc(env); obj->env = new_env; for (size_t i = 0; i < array_size(obj->params); i++) { insert_params(obj->env, obj->params[i]); } // Used for removing unnecessary statements. Object **new_body = NULL; array_init(new_body, 0); for (size_t i = 0; i < array_size(obj->body); i++) { Object *expr = obj->body[i]; if (i != array_size(obj->body) - 1) { if (IS_FIXNUM(expr) || IS_STRING(expr) || IS_SYMBOL(expr) || IS_LAMBDA(expr) || IS_BOOL(expr) || IS_NIL(expr)) { continue; } } semantic_analysis(obj->env, expr, errors); array_push(new_body, expr); } array_free(obj->body); obj->body = new_body; } break; default: break; } } Program parse(Token *tokens, Errors *errors) { array_init(roots, 0); array_init(objects, 0); array_init(environments, 0); Parser parser = { .tokens = tokens, .current = 0, }; // Build initial ASTs. This also ensures the core grammar is correct. while (has_next_token(&parser)) { Object *root = parse_tree(&parser, errors); if (errors->n != 0) { return (Program){0}; } array_push(roots, root); } // Prepare global environment. Environment *env = env_alloc(NULL); // Perform semantic analysis: // 1. Populate symbol tables and ensure symbols are in scope when used. // 2. Removing unnecessary expressions. // 3. Verify number of arguments is correct in function calls. Root *final_roots = NULL; array_init(final_roots, 0); for (size_t i = 0; i < array_size(roots); i++) { Object *root = roots[i]; if (i != array_size(roots) - 1) { if (IS_FIXNUM(root) || IS_STRING(root) || IS_SYMBOL(root) || IS_LAMBDA(root) || IS_BOOL(root) || IS_NIL(root)) { continue; } } array_push(final_roots, root); semantic_analysis(env, root, errors); if (errors->n != 0) { array_free(final_roots); return (Program){0}; } } array_free(roots); roots = final_roots; // TODO: Check if primitive procedures have been given the right number of // arguments. // TODO: Type check basic expressions (e.g. arithmetic/numeric comparisons). // We can't be sure when we have functions unless the return type is known. return (Program){roots, env}; } Environment * env_alloc(Environment *parent) { Environment *env = malloc(sizeof(Environment)); env->locals = NULL; env->local_values = NULL; env->params = NULL; array_init(env->locals, 0); array_init(env->local_values, 0); array_init(env->params, 0); array_init(env->captured, 0); env->parent = parent; array_push(environments, env); return env; } Object * object_alloc(Token tok, ObjectType type) { Object *node = malloc(sizeof(Object)); node->line = tok.line; node->col = tok.col; node->type = type; node->visited = false; array_push(objects, node); return node; } void object_free(Object *node) { if (node == NULL) { return; } if (IS_LAMBDA(node)) { array_free(node->params); array_free(node->body); } free(node); } void free_objects(void) { if (objects != NULL) { for (size_t i = 0; i < array_size(objects); i++) { object_free(objects[i]); } array_free(objects); } if (environments != NULL) { for (size_t i = 0; i < array_size(environments); i++) { Environment *env = environments[i]; array_free(env->locals); array_free(env->local_values); array_free(env->params); array_free(env->captured); free(env); } array_free(environments); } array_free(roots); } void display_pair(Object *obj) { object_display(obj->head); if (obj->tail == NULL) { return; } if (IS_PAIR(obj->tail)) { printf(" "); display_pair(obj->tail); } else { printf(" . "); object_display(obj->tail); } } void object_display(Object *obj) { if (obj == NULL) { printf("#{error}"); return; } switch (obj->type) { case OBJ_TYPE_FIXNUM: { printf("%zd", obj->fixnum); } break; case OBJ_TYPE_TRUE: { printf("true"); } break; case OBJ_TYPE_FALSE: { printf("false"); } break; case OBJ_TYPE_NIL: { printf("()"); } break; case OBJ_TYPE_STRING: { printf("\"%.*s\"", (int)obj->text.n, obj->text.start); } break; case OBJ_TYPE_SYMBOL: { printf(":%.*s", (int)obj->text.n, obj->text.start); } break; case OBJ_TYPE_PAIR: { printf("("); display_pair(obj); printf(")"); } break; case OBJ_TYPE_LAMBDA: { printf("#{ lambda ( "); for (size_t i = 0; i < array_size(obj->params); i++) { object_display(obj->params[i]); printf(" "); } printf(") "); for (size_t i = 0; i < array_size(obj->body); i++) { object_display(obj->body[i]); printf(" "); } printf("}"); } break; case OBJ_TYPE_IF: { printf("#{ if "); object_display(obj->condition); printf(" "); object_display(obj->expr_true); if (obj->expr_false != NULL) { printf(" "); object_display(obj->expr_false); } printf(" }"); } break; case OBJ_TYPE_DEF: { printf("#{ def "); object_display(obj->var_name); printf(" "); object_display(obj->var_expr); printf(" }"); } break; case OBJ_TYPE_SET: { printf("#{ set! "); object_display(obj->var_name); printf(" "); object_display(obj->var_expr); printf(" }"); } break; case OBJ_TYPE_BUILTIN: { printf("%.*s", (int)obj->builtin_text.n, obj->builtin_text.start); } break; } return; } bool object_equal(Object *a, Object *b) { if (a == NULL || b == NULL || a->type != b->type) { return false; } switch (a->type) { case OBJ_TYPE_TRUE: case OBJ_TYPE_FALSE: { return true; } break; case OBJ_TYPE_FIXNUM: { return a->fixnum == b->fixnum; } break; case OBJ_TYPE_SYMBOL: case OBJ_TYPE_STRING: { return sv_equal(&a->text, &b->text); } break; case OBJ_TYPE_BUILTIN: { return a->builtin == b->builtin; } break; case OBJ_TYPE_PAIR: { Object *a_head = a->head; Object *b_head = b->head; if (!object_equal(a_head, b_head)) { return false; } Object *a_tail = a->tail; Object *b_tail = b->tail; while (a_tail != NULL && b_tail != NULL) { if (!object_equal(a_head, b_head)) { return false; } a_head = a_tail->head; b_head = b_tail->head; a_tail = a_tail->tail; b_tail = b_tail->tail; } if (a_tail == b_tail && object_equal(a_head, b_head)) { return true; } return false; } break; case OBJ_TYPE_LAMBDA: { size_t n_params_a = array_size(a->params); size_t n_params_b = array_size(b->params); size_t n_expr_a = array_size(a->body); size_t n_expr_b = array_size(b->body); if (n_params_a != n_params_b || n_expr_a != n_expr_b) { return false; } for (size_t i = 0; i < n_params_a; ++i) { if (!object_equal(a->params[i], b->params[i])) { return false; } } for (size_t i = 0; i < n_expr_a; ++i) { if (!object_equal(a->body[i], b->body[i])) { return false; } } return true; } break; default: break; } return false; }