#include "parser.h" #include "darray.h" static Object **objects = NULL; static Root *roots = NULL; Token peek_token(const Parser *parser) { return parser->tokens[parser->current]; } Token next_token(Parser *parser) { return parser->tokens[parser->current++]; } Token previous_token(Parser *parser) { return parser->tokens[parser->current - 1]; } Token rewind_token(Parser *parser) { return parser->tokens[--parser->current]; } bool has_next_token(const Parser *parser) { return parser->current < array_size(parser->tokens) && peek_token(parser).type != TOKEN_EOF; } Object * parse_fixnum(Token tok) { ssize_t num = 0; ssize_t sign = 1; for (size_t i = 0; i < tok.value.n; i++) { char c = tok.value.start[i]; if (c == '-') { sign = -1; continue; } num = num * 10 + (c - '0'); } Object *ret = object_alloc(tok, OBJ_TYPE_FIXNUM); ret->fixnum = num * sign; return ret; } Object * parse_bool(Token tok) { ObjectType type = tok.type == TOKEN_TRUE ? OBJ_TYPE_TRUE : OBJ_TYPE_FALSE; Object *ret = object_alloc(tok, type); return ret; } Object * parse_string(Token tok) { Object *ret = object_alloc(tok, OBJ_TYPE_STRING); array_init(ret->text, tok.value.n); array_insert(ret->text, tok.value.start, tok.value.n); return ret; } Object * parse_symbol(Token tok) { Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL); array_init(ret->text, tok.value.n); array_insert(ret->text, tok.value.start, tok.value.n); return ret; } Object * parse_lambda(Parser *parser, Errors *errors) { Token start = next_token(parser); Object *lambda = object_alloc(start, OBJ_TYPE_LAMBDA); array_init(lambda->params, 0); array_init(lambda->body, 0); // Parse parameters. Token tok = next_token(parser); if (tok.type == TOKEN_LPAREN) { while (has_next_token(parser)) { Token tok = next_token(parser); if (tok.type == TOKEN_RPAREN) { break; } if (tok.type != TOKEN_SYMBOL) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_WRONG_ARG_TYPE, .line = tok.line, .col = tok.col, }); } Object *symbol = parse_symbol(tok); array_push(lambda->params, symbol); } } else if (tok.type != TOKEN_NIL) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_WRONG_ARG_TYPE, .line = tok.line, .col = tok.col, }); } // Parse body. while (has_next_token(parser)) { Token tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { next_token(parser); break; } Object *expr = parse_tree(parser, errors); array_push(lambda->body, expr); } return lambda; } Object * parse_list(Parser *parser, Errors *errors) { if (errors->n != 0) { return NULL; } Token tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = tok.line, .col = tok.col, }); return NULL; } if (tok.type == TOKEN_LAMBDA) { return parse_lambda(parser, errors); } Token start = previous_token(parser); Object *root = object_alloc(start, OBJ_TYPE_PAIR); root->head = NULL; root->tail = NULL; Object *current = root; while (has_next_token(parser)) { current->head = parse_tree(parser, errors); if (errors->n != 0 || current->head == NULL) { return NULL; } Token tok = peek_token(parser); if (tok.type == TOKEN_RPAREN) { next_token(parser); return root; } Object *next = object_alloc(start, OBJ_TYPE_PAIR); next->head = NULL; next->tail = NULL; current->tail = next; current = current->tail; } error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.col, }); return NULL; } Object * parse_tree(Parser *parser, Errors *errors) { Token tok = next_token(parser); if (errors->n != 0) { return NULL; } switch (tok.type) { case TOKEN_FIXNUM: { return parse_fixnum(tok); } break; case TOKEN_TRUE: case TOKEN_FALSE: { return parse_bool(tok); } break; case TOKEN_RPAREN: { error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, .line = tok.line, .col = tok.col, }); return NULL; } break; case TOKEN_LPAREN: { return parse_list(parser, errors); } break; case TOKEN_STRING: { return parse_string(tok); } break; case TOKEN_SYMBOL: { return parse_symbol(tok); } break; case TOKEN_NIL: { return object_alloc(tok, OBJ_TYPE_NIL); } break; default: { break; } break; } error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_EOF_REACHED, .line = tok.line, .col = tok.col, }); return NULL; } Root * parse(Token *tokens, Errors *errors) { // Build initial AST. array_init(roots, 0); Parser parser = { .tokens = tokens, .current = 0, }; while (has_next_token(&parser)) { Object *root = parse_tree(&parser, errors); OBJ_PRINT(root); if (errors->n != 0) { break; } array_push(roots, root); } // Perform semantic analysis. // 1. Ensure core grammar is correct. // 2. Check that symbols are defined before usage. // 3. Remove unnecessary statements. return roots; } Object * object_alloc(Token tok, ObjectType type) { if (objects == NULL) { array_init(objects, 0); } Object *node = malloc(sizeof(Object)); node->line = tok.line; node->col = tok.col; node->type = type; array_push(objects, node); return node; } void object_free(Object *node) { if (node == NULL) { return; } if (IS_STRING(node) || IS_SYMBOL(node)) { array_free(node->text); } if (IS_LAMBDA(node)) { array_free(node->params); array_free(node->body); } free(node); } void free_objects(void) { if (objects != NULL) { for (size_t i = 0; i < array_size(objects); i++) { object_free(objects[i]); } array_free(objects); } array_free(roots); } void display_pair(Object *obj) { object_display(obj->head); if (obj->tail == NULL) { return; } if (IS_PAIR(obj->tail)) { printf(" "); display_pair(obj->tail); } else { printf(" . "); object_display(obj->tail); } } void object_display(Object *obj) { if (obj == NULL) { printf("#{error}"); return; } switch (obj->type) { case OBJ_TYPE_FIXNUM: { printf("%zd", obj->fixnum); } break; case OBJ_TYPE_TRUE: { printf("true"); } break; case OBJ_TYPE_FALSE: { printf("false"); } break; case OBJ_TYPE_NIL: { printf("()"); } break; case OBJ_TYPE_STRING: { printf("\"%.*s\"", (int)array_size(obj->text), obj->text); } break; case OBJ_TYPE_SYMBOL: { printf(":%.*s", (int)array_size(obj->text), obj->text); } break; case OBJ_TYPE_PAIR: { printf("("); display_pair(obj); printf(")"); } break; case OBJ_TYPE_LAMBDA: { printf("#{lambda( "); for (size_t i = 0; i < array_size(obj->params); i++) { object_display(obj->params[i]); printf(" "); } printf(")}"); } break; } return; }