#include "parser.h" #include "darray.h" static Object **objects = NULL; static Root *roots = NULL; static Environment **environments = NULL; static char *builtins [] = { "+", "-", "*", "/", "%", "=", "<", ">", "<=", ">=", "not", "and", "or", "nil?", "zero?", "fixnum?", "bool?", "display", "cons", "car", "cdr", }; uint64_t symbol_hash(const HashTable *table, void *key) { Object *obj = key; uint64_t hash = _xor_shift_hash(obj->text.start, obj->text.n); hash = _fibonacci_hash(hash, table->shift_amount); return hash; } Token peek_token(const Parser *parser) { if (parser->current >= array_size(parser->tokens)) { return parser->tokens[array_size(parser->tokens) - 1]; } return parser->tokens[parser->current]; } Token next_token(Parser *parser) { if (parser->current >= array_size(parser->tokens)) { return parser->tokens[array_size(parser->tokens) - 1]; } return parser->tokens[parser->current++]; } Token previous_token(Parser *parser) { return parser->tokens[parser->current - 1]; } Token rewind_token(Parser *parser) { return parser->tokens[--parser->current]; } bool has_next_token(const Parser *parser) { return parser->current < array_size(parser->tokens) && peek_token(parser).type != TOKEN_EOF; } Object * parse_fixnum(Token tok) { ssize_t num = 0; ssize_t sign = 1; for (size_t i = 0; i < tok.value.n; i++) { char c = tok.value.start[i]; if (c == '-') { sign = -1; continue; } num = num * 10 + (c - '0'); } Object *ret = object_alloc(tok, OBJ_TYPE_FIXNUM); ret->fixnum = num * sign; return ret; } Object * parse_bool(Token tok) { ObjectType type = tok.type == TOKEN_TRUE ? OBJ_TYPE_TRUE : OBJ_TYPE_FALSE; Object *ret = object_alloc(tok, type); return ret; } Object * parse_string(Token tok) { Object *ret = object_alloc(tok, OBJ_TYPE_STRING); ret->text = tok.value; return ret; } Object * parse_symbol(Token tok) { Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL); ret->text = tok.value; return ret; } Object * parse_lambda(Parser *parser, Errors *errors) { Token start = next_token(parser); Object *lambda = object_alloc(start, OBJ_TYPE_LAMBDA); array_init(lambda->params, 0); array_init(lambda->body, 0); lambda->env = NULL; // Parse parameters. Token tok = next_token(parser); if (tok.type == TOKEN_LPAREN) { while (has_next_token(parser)) { Token tok = next_token(parser); if (tok.type == TOKEN_RPAREN) { break; } if (tok.type != TOKEN_SYMBOL) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_WRONG_ARG_TYPE, .line = tok.line, .col = tok.col, }); } Object *symbol = parse_symbol(tok); array_push(lambda->params, symbol); } } else if (tok.type != TOKEN_NIL) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_WRONG_ARG_TYPE, .line = tok.line, .col = tok.col, }); return NULL; } // Parse body. bool done = false; while (has_next_token(parser)) { Token tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { next_token(parser); done = true; break; } Object *expr = parse_tree(parser, errors); array_push(lambda->body, expr); } if (!done) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = tok.line, .col = tok.col, }); return NULL; } if (array_size(lambda->body) == 0) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_ENOUGH_ARGS, .line = start.line, .col = start.col, }); return NULL; } return lambda; } Object * parse_if(Parser *parser, Errors *errors) { Token start = next_token(parser); Object *ret = object_alloc(start, OBJ_TYPE_IF); Token tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_ENOUGH_ARGS, .line = tok.line, .col = tok.col, }); return NULL; } ret->condition = parse_tree(parser, errors); tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_ENOUGH_ARGS, .line = tok.line, .col = tok.col, }); return NULL; } ret->expr_true = parse_tree(parser, errors); // Optional else expression. tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { next_token(parser); ret->expr_false = NULL; return ret; } ret->expr_false = parse_tree(parser, errors); tok = peek_token(parser); if (tok.type == TOKEN_EOF) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = tok.line, .col = tok.col, }); return NULL; } if (tok.type != TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_TOO_MANY_ARGS, .line = tok.line, .col = tok.col, }); return NULL; } next_token(parser); return ret; } Object * parse_var(Parser *parser, Errors *errors) { Token start = next_token(parser); ObjectType type = start.type == TOKEN_DEF ? OBJ_TYPE_DEF : OBJ_TYPE_SET; Object *ret = object_alloc(start, type); // Variable name. Token tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_ENOUGH_ARGS, .line = tok.line, .col = tok.col, }); return NULL; } if (tok.type != TOKEN_SYMBOL) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_WRONG_ARG_TYPE, .line = tok.line, .col = tok.col, }); return NULL; } ret->var_name = parse_tree(parser, errors); // Variable value (expression). tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_ENOUGH_ARGS, .line = tok.line, .col = tok.col, }); return NULL; } ret->var_expr = parse_tree(parser, errors); tok = peek_token(parser); if (tok.type == TOKEN_EOF) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = tok.line, .col = tok.col, }); return NULL; } if (tok.type != TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_TOO_MANY_ARGS, .line = tok.line, .col = tok.col, }); return NULL; } next_token(parser); return ret; } Object * parse_fun(Parser *parser, Errors *errors) { Token start = next_token(parser); Object *ret = object_alloc(start, OBJ_TYPE_DEF); // Variable name. Token tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_ENOUGH_ARGS, .line = tok.line, .col = tok.col, }); return NULL; } if (tok.type != TOKEN_SYMBOL) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_WRONG_ARG_TYPE, .line = tok.line, .col = tok.col, }); return NULL; } ret->var_name = parse_tree(parser, errors); // Variable value (expression). rewind_token(parser); ret->var_expr = parse_lambda(parser, errors); return ret; } Object * parse_list(Parser *parser, Errors *errors) { if (errors->n != 0) { return NULL; } Token tok = peek_token(parser); switch (tok.type) { case TOKEN_RPAREN: { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = tok.line, .col = tok.col, }); return NULL; } break; case TOKEN_LAMBDA: { return parse_lambda(parser, errors); } break; case TOKEN_IF: { return parse_if(parser, errors); } break; case TOKEN_DEF: { return parse_var(parser, errors); } break; case TOKEN_SET: { return parse_var(parser, errors); } break; case TOKEN_FUN: { return parse_fun(parser, errors); } break; default: break; } Token start = previous_token(parser); Object *root = object_alloc(start, OBJ_TYPE_PAIR); root->head = NULL; root->tail = NULL; root->n_elems = 0; Object *current = root; bool first = true; while (has_next_token(parser)) { Token tok = peek_token(parser); current->head = parse_tree(parser, errors); if (errors->n != 0 || current->head == NULL) { return NULL; } if (first) { if (!IS_SYMBOL(current->head) && !IS_LAMBDA(current->head)) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_CALLABLE, .line = tok.line, .col = tok.col, }); return NULL; } first = false; } tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { next_token(parser); return root; } Object *next = object_alloc(start, OBJ_TYPE_PAIR); next->head = NULL; next->tail = NULL; current->tail = next; current = current->tail; root->n_elems++; } error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.col, }); return NULL; } Object * parse_tree(Parser *parser, Errors *errors) { Token tok = next_token(parser); if (errors->n != 0) { return NULL; } switch (tok.type) { case TOKEN_FIXNUM: { return parse_fixnum(tok); } break; case TOKEN_TRUE: case TOKEN_FALSE: { return parse_bool(tok); } break; case TOKEN_RPAREN: { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = tok.line, .col = tok.col, }); return NULL; } break; case TOKEN_LPAREN: { return parse_list(parser, errors); } break; case TOKEN_STRING: { return parse_string(tok); } break; case TOKEN_SYMBOL: { return parse_symbol(tok); } break; case TOKEN_NIL: { return object_alloc(tok, OBJ_TYPE_NIL); } break; default: { break; } break; } return NULL; } Object * symbol_in_env(Environment *env, Object *symbol) { while (env != NULL) { Object *found = ht_lookup(env->table, symbol); if (found != NULL) { return found; } env = env->parent; } return NULL; } void semantic_analysis(Environment *env, Object *obj, Errors *errors) { if (obj == NULL || obj->visited) { return; } obj->visited = true; switch (obj->type) { case OBJ_TYPE_SYMBOL: { Object *found = symbol_in_env(env, obj); if (symbol_in_env(env, obj) == NULL) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_SYMBOL_NOT_FOUND, .line = obj->line, .col = obj->col, }); return; } semantic_analysis(env, found, errors); } break; case OBJ_TYPE_DEF: { ht_insert(env->table, obj->var_name, obj->var_expr); semantic_analysis(env, obj->var_expr, errors); } break; case OBJ_TYPE_SET: { semantic_analysis(env, obj->var_name, errors); semantic_analysis(env, obj->var_expr, errors); } break; case OBJ_TYPE_IF: { semantic_analysis(env, obj->condition, errors); semantic_analysis(env, obj->expr_true, errors); semantic_analysis(env, obj->expr_false, errors); } break; case OBJ_TYPE_PAIR: { Object *head = obj->head; if (IS_SYMBOL(head)) { head = symbol_in_env(env, head); if (head == NULL) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_SYMBOL_NOT_FOUND, .line = obj->head->line, .col = obj->head->col, }); return; } } if (IS_LAMBDA(head)) { if (obj->n_elems != array_size(head->params)) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_NOT_ENOUGH_ARGS, .line = obj->line, .col = obj->col }); return; } } semantic_analysis(env, obj->head, errors); semantic_analysis(env, obj->tail, errors); } break; case OBJ_TYPE_LAMBDA: { // Initialize scope for this lambda. Environment *new_env = env_alloc(env); obj->env = new_env; // Used for removing unnecessary statements. Object **new_body = NULL; array_init(new_body, 0); for (size_t i = 0; i < array_size(obj->body); i++) { Object *expr = obj->body[i]; if (i != array_size(obj->body) - 1) { if (IS_FIXNUM(expr) || IS_STRING(expr) || IS_SYMBOL(expr) || IS_LAMBDA(expr) || IS_BOOL(expr) || IS_NIL(expr)) { continue; } } semantic_analysis(obj->env, expr, errors); array_push(new_body, expr); } array_free(obj->body); obj->body = new_body; } break; default: break; } } Root * parse(Token *tokens, Errors *errors) { array_init(roots, 0); array_init(objects, 0); array_init(environments, 0); Parser parser = { .tokens = tokens, .current = 0, }; // Build initial ASTs. This also ensures the core grammar is correct. while (has_next_token(&parser)) { Object *root = parse_tree(&parser, errors); if (errors->n != 0) { return NULL; } array_push(roots, root); } // Prepare global environment of builtin functions. Environment *global_env = env_alloc(NULL); size_t n_builtins = sizeof(builtins) / sizeof(char*); for (size_t i = 0; i < n_builtins; i++) { // Prepare builtin symbol. char *str = builtins[i]; size_t str_n = strlen(str); Object *symbol = object_alloc((Token){0}, OBJ_TYPE_SYMBOL); symbol->text = (StringView){str, str_n}; // Insert in global table. ht_insert(global_env->table, symbol, symbol); } // Perform semantic analysis: // 1. Populate symbol tables and ensure symbols are in scope when used. // 2. Removing unnecessary expressions. // 3. Verify number of arguments is correct in function calls. Root *final_roots = NULL; array_init(final_roots, 0); for (size_t i = 0; i < array_size(roots); i++) { Object *root = roots[i]; if (i != array_size(roots) - 1) { if (IS_FIXNUM(root) || IS_STRING(root) || IS_SYMBOL(root) || IS_LAMBDA(root) || IS_BOOL(root) || IS_NIL(root)) { continue; } } array_push(final_roots, root); semantic_analysis(global_env, root, errors); if (errors->n != 0) { array_free(final_roots); return NULL; } } array_free(roots); roots = final_roots; // TODO: Check if primitive procedures have been given the right number of // arguments. // TODO: Type check basic expressions (e.g. arithmetic/numeric comparisons). // We can't be sure when we have functions unless the return type is known. return roots; } Environment * env_alloc(Environment *parent) { Environment *env = malloc(sizeof(Environment)); env->table = ht_init(symbol_hash, (EqFunc*)object_equal); env->parent = parent; array_push(environments, env); return env; } Object * object_alloc(Token tok, ObjectType type) { Object *node = malloc(sizeof(Object)); node->line = tok.line; node->col = tok.col; node->type = type; node->visited = false; array_push(objects, node); return node; } void object_free(Object *node) { if (node == NULL) { return; } if (IS_LAMBDA(node)) { array_free(node->params); array_free(node->body); } free(node); } void free_objects(void) { if (objects != NULL) { for (size_t i = 0; i < array_size(objects); i++) { object_free(objects[i]); } array_free(objects); } if (environments != NULL) { for (size_t i = 0; i < array_size(environments); i++) { Environment *env = environments[i]; ht_free(env->table); free(env); } array_free(environments); } array_free(roots); } void display_pair(Object *obj) { object_display(obj->head); if (obj->tail == NULL) { return; } if (IS_PAIR(obj->tail)) { printf(" "); display_pair(obj->tail); } else { printf(" . "); object_display(obj->tail); } } void object_display(Object *obj) { if (obj == NULL) { printf("#{error}"); return; } switch (obj->type) { case OBJ_TYPE_FIXNUM: { printf("%zd", obj->fixnum); } break; case OBJ_TYPE_TRUE: { printf("true"); } break; case OBJ_TYPE_FALSE: { printf("false"); } break; case OBJ_TYPE_NIL: { printf("()"); } break; case OBJ_TYPE_STRING: { printf("\"%.*s\"", (int)obj->text.n, obj->text.start); } break; case OBJ_TYPE_SYMBOL: { printf(":%.*s", (int)obj->text.n, obj->text.start); } break; case OBJ_TYPE_PAIR: { printf("("); display_pair(obj); printf(")"); } break; case OBJ_TYPE_LAMBDA: { printf("#{ lambda ( "); for (size_t i = 0; i < array_size(obj->params); i++) { object_display(obj->params[i]); printf(" "); } printf(") "); for (size_t i = 0; i < array_size(obj->body); i++) { object_display(obj->body[i]); printf(" "); } printf("}"); } break; case OBJ_TYPE_IF: { printf("#{ if "); object_display(obj->condition); printf(" "); object_display(obj->expr_true); if (obj->expr_false != NULL) { printf(" "); object_display(obj->expr_false); } printf(" }"); } break; case OBJ_TYPE_DEF: { printf("#{ def "); object_display(obj->var_name); printf(" "); object_display(obj->var_expr); printf(" }"); } break; case OBJ_TYPE_SET: { printf("#{ set! "); object_display(obj->var_name); printf(" "); object_display(obj->var_expr); printf(" }"); } break; } return; } bool object_equal(Object *a, Object *b) { if (a == NULL || b == NULL || a->type != b->type) { return false; } switch (a->type) { case OBJ_TYPE_TRUE: case OBJ_TYPE_FALSE: { return true; } break; case OBJ_TYPE_FIXNUM: { return a->fixnum == b->fixnum; } break; case OBJ_TYPE_SYMBOL: case OBJ_TYPE_STRING: { return sv_equal(&a->text, &b->text); } break; default: break; } return false; }