From 5ed73b695e6b463149ab0c9ae3eccb26a4ec5807 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Fri, 29 Oct 2021 19:11:40 +0200 Subject: Add parser for tokens->ast conversion --- src/parser.c | 257 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 257 insertions(+) create mode 100644 src/parser.c (limited to 'src/parser.c') diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..2d70c9d --- /dev/null +++ b/src/parser.c @@ -0,0 +1,257 @@ +#include "parser.h" +#include "darray.h" + +Token +peek_token(const Parser *parser) { + return parser->tokens[parser->current]; +} + +Token +next_token(Parser *parser) { + return parser->tokens[parser->current++]; +} + +Token +previous_token(Parser *parser) { + return parser->tokens[parser->current - 1]; +} + +Token +rewind_token(Parser *parser) { + return parser->tokens[--parser->current]; +} + +bool +has_next_token(const Parser *parser) { + return parser->current < array_size(parser->tokens) + && peek_token(parser).type != TOKEN_EOF; +} + +Object * +parse_fixnum(Token tok) { + ssize_t num = 0; + ssize_t sign = 1; + for (size_t i = 0; i < tok.value.n; i++) { + char c = tok.value.start[i]; + if (c == '-') { + sign = -1; + continue; + } + num = num * 10 + (c - '0'); + } + Object *ret = object_alloc(tok, OBJ_TYPE_FIXNUM); + ret->fixnum = num * sign; + return ret; +} + +Object * +parse_bool(Token tok) { + ObjectType type = tok.type == TOKEN_TRUE ? OBJ_TYPE_TRUE : OBJ_TYPE_FALSE; + Object *ret = object_alloc(tok, type); + return ret; +} + +Object * +parse_string(Token tok) { + Object *ret = object_alloc(tok, OBJ_TYPE_STRING); + array_init(ret->text, tok.value.n); + array_insert(ret->text, tok.value.start, tok.value.n); + return ret; +} + +Object * +parse_symbol(Token tok) { + Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL); + array_init(ret->text, tok.value.n); + array_insert(ret->text, tok.value.start, tok.value.n); + return ret; +} + +Object * +parse_list(Parser *parser, Errors *errors) { + if (errors->n != 0) { + return NULL; + } + Token start = previous_token(parser); + Object *root = object_alloc(start, OBJ_TYPE_PAIR); + root->car = NULL; + root->cdr = NULL; + Object *current = root; + while (has_next_token(parser)) { + current->car = parse_tree(parser, errors); + if (errors->n != 0 || current->car == NULL) { + object_free(root); + return NULL; + } + + Token tok = peek_token(parser); + if (tok.type == TOKEN_RPAREN) { + next_token(parser); + return root; + } + + Object *next = object_alloc(start, OBJ_TYPE_PAIR); + next->car = NULL; + next->cdr = NULL; + current->cdr = next; + current = current->cdr; + } + object_free(root); + error_push(errors, (Error){ + .type = ERR_TYPE_PARSER, + .value = ERR_UNBALANCED_PAREN, + .line = start.line, + .col = start.col, + }); + return NULL; +} + +Object * +parse_tree(Parser *parser, Errors *errors) { + Token tok = next_token(parser); + if (errors->n != 0) { + return NULL; + } + switch (tok.type) { + case TOKEN_FIXNUM: { + return parse_fixnum(tok); + } break; + case TOKEN_TRUE: + case TOKEN_FALSE: { + return parse_bool(tok); + } break; + case TOKEN_RPAREN: { + error_push(errors, (Error){ + .type = ERR_TYPE_PARSER, + .value = ERR_UNBALANCED_PAREN, + .line = tok.line, + .col = tok.col, + }); + return NULL; + } break; + case TOKEN_LPAREN: { + return parse_list(parser, errors); + } break; + case TOKEN_STRING: { + return parse_string(tok); + } break; + case TOKEN_SYMBOL: { + return parse_symbol(tok); + } break; + case TOKEN_NIL: { + return object_alloc(tok, OBJ_TYPE_NIL); + } break; + default: { + break; + } break; + } + error_push(errors, (Error){ + .type = ERR_TYPE_PARSER, + .value = ERR_EOF_REACHED, + .line = tok.line, + .col = tok.col, + }); + return NULL; +} + +Root * +parse(Token *tokens, Errors *errors) { + Root *roots = NULL; + array_init(roots, 0); + Parser parser = { + .tokens = tokens, + .current = 0, + }; + while (has_next_token(&parser)) { + Object *root = parse_tree(&parser, errors); + OBJ_PRINT(root); + if (errors->n != 0) { + break; + } + array_push(roots, root); + } + return roots; +} + +Object * +object_alloc(Token tok, ObjectType type) { + Object *node = malloc(sizeof(Object)); + node->line = tok.line; + node->col = tok.col; + node->type = type; + return node; +} + +void +object_free(Object *node) { + if (node == NULL) { + return; + } + if (IS_STRING(node) || IS_SYMBOL(node)) { + array_free(node->text); + } + if (IS_PAIR(node)) { + object_free(node->car); + object_free(node->cdr); + } + free(node); +} + +void +free_roots(Root *roots) { + if (roots != NULL) { + for (size_t i = 0; i < array_size(roots); i++) { + object_free(roots[i]); + } + array_free(roots); + } +} + +void +display_pair(Object *obj) { + object_display(obj->car); + if (obj->cdr == NULL) { + return; + } + if (IS_PAIR(obj->cdr)) { + printf(" "); + display_pair(obj->cdr); + } else { + printf(" . "); + object_display(obj->cdr); + } +} + +void +object_display(Object *obj) { + if (obj == NULL) { + printf("#{error}"); + return; + } + switch (obj->type) { + case OBJ_TYPE_FIXNUM: { + printf("%zd", obj->fixnum); + } break; + case OBJ_TYPE_TRUE: { + printf("true"); + } break; + case OBJ_TYPE_FALSE: { + printf("false"); + } break; + case OBJ_TYPE_NIL: { + printf("()"); + } break; + case OBJ_TYPE_STRING: { + printf("\"%.*s\"", (int)array_size(obj->text), obj->text); + } break; + case OBJ_TYPE_SYMBOL: { + printf(":%.*s", (int)array_size(obj->text), obj->text); + } break; + case OBJ_TYPE_PAIR: { + printf("("); + display_pair(obj); + printf(")"); + } break; + } + return; +} -- cgit v1.2.1