From 4ebcd99d1fadac72ea58ea46012a86c5319ef7e7 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Sat, 30 Oct 2021 08:16:08 +0200 Subject: Add parsing of lambda expression --- src/lexer.c | 2 + src/lexer.h | 3 ++ src/main.c | 4 +- src/parser.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++----------- src/parser.h | 15 +++++-- 5 files changed, 125 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/lexer.c b/src/lexer.c index ac93a5c..f272901 100755 --- a/src/lexer.c +++ b/src/lexer.c @@ -11,6 +11,7 @@ static const char* token_str[] = { [TOKEN_NIL] = "TOKEN_NIL", [TOKEN_TRUE] = "TOKEN_TRUE", [TOKEN_FALSE] = "TOKEN_FALSE", + [TOKEN_LAMBDA] = "TOKEN_LAMBDA", [TOKEN_EOF] = "TOKEN_EOF", }; @@ -124,6 +125,7 @@ find_primitive_type(const StringView value) { if (TOKEN_IS_KEYWORD(value, "nil")) { return TOKEN_NIL; } if (TOKEN_IS_KEYWORD(value, "true")) { return TOKEN_TRUE; } if (TOKEN_IS_KEYWORD(value, "false")) { return TOKEN_FALSE; } + if (TOKEN_IS_KEYWORD(value, "lambda")) { return TOKEN_LAMBDA; } return TOKEN_SYMBOL; } diff --git a/src/lexer.h b/src/lexer.h index 43898bd..4fa4db5 100755 --- a/src/lexer.h +++ b/src/lexer.h @@ -18,6 +18,9 @@ typedef enum TokenType { TOKEN_TRUE, TOKEN_FALSE, + // Keywords. + TOKEN_LAMBDA, + // End of file. TOKEN_EOF, } TokenType; diff --git a/src/main.c b/src/main.c index c734916..6a683a7 100755 --- a/src/main.c +++ b/src/main.c @@ -35,7 +35,7 @@ process_source(const StringView *source, const char *file_name) { Root *roots = parse(tokens, &errors); if (errors.n != 0) { report_errors(&errors, file_name); - free_roots(roots); + free_objects(); array_free(tokens); exit(EXIT_FAILURE); } @@ -46,7 +46,7 @@ process_source(const StringView *source, const char *file_name) { // TODO: Compilation. // Free resources. - free_roots(roots); + free_objects(); } void diff --git a/src/parser.c b/src/parser.c index 2d70c9d..8a6c4ce 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,6 +1,9 @@ #include "parser.h" #include "darray.h" +static Object **objects = NULL; +static Root *roots = NULL; + Token peek_token(const Parser *parser) { return parser->tokens[parser->current]; @@ -67,20 +70,82 @@ parse_symbol(Token tok) { return ret; } +Object * +parse_lambda(Parser *parser, Errors *errors) { + Token start = next_token(parser); + Object *lambda = object_alloc(start, OBJ_TYPE_LAMBDA); + array_init(lambda->params, 0); + array_init(lambda->body, 0); + + // Parse parameters. + Token tok = next_token(parser); + if (tok.type == TOKEN_LPAREN) { + while (has_next_token(parser)) { + Token tok = next_token(parser); + if (tok.type == TOKEN_RPAREN) { + break; + } + if (tok.type != TOKEN_SYMBOL) { + error_push(errors, (Error){ + .type = ERR_TYPE_PARSER, + .value = ERR_WRONG_ARG_TYPE, + .line = tok.line, + .col = tok.col, + }); + } + Object *symbol = parse_symbol(tok); + array_push(lambda->params, symbol); + } + } else if (tok.type != TOKEN_NIL) { + error_push(errors, (Error){ + .type = ERR_TYPE_PARSER, + .value = ERR_WRONG_ARG_TYPE, + .line = tok.line, + .col = tok.col, + }); + } + + // Parse body. + while (has_next_token(parser)) { + Token tok = peek_token(parser); + if (tok.type == TOKEN_RPAREN) { + next_token(parser); + break; + } + Object *expr = parse_tree(parser, errors); + array_push(lambda->body, expr); + } + return lambda; +} + Object * parse_list(Parser *parser, Errors *errors) { if (errors->n != 0) { return NULL; } + + Token tok = peek_token(parser); + if (tok.type == TOKEN_RPAREN) { + error_push(errors, (Error){ + .type = ERR_TYPE_PARSER, + .value = ERR_UNBALANCED_PAREN, + .line = tok.line, + .col = tok.col, + }); + return NULL; + } + if (tok.type == TOKEN_LAMBDA) { + return parse_lambda(parser, errors); + } + Token start = previous_token(parser); Object *root = object_alloc(start, OBJ_TYPE_PAIR); - root->car = NULL; - root->cdr = NULL; + root->head = NULL; + root->tail = NULL; Object *current = root; while (has_next_token(parser)) { - current->car = parse_tree(parser, errors); - if (errors->n != 0 || current->car == NULL) { - object_free(root); + current->head = parse_tree(parser, errors); + if (errors->n != 0 || current->head == NULL) { return NULL; } @@ -91,12 +156,11 @@ parse_list(Parser *parser, Errors *errors) { } Object *next = object_alloc(start, OBJ_TYPE_PAIR); - next->car = NULL; - next->cdr = NULL; - current->cdr = next; - current = current->cdr; + next->head = NULL; + next->tail = NULL; + current->tail = next; + current = current->tail; } - object_free(root); error_push(errors, (Error){ .type = ERR_TYPE_PARSER, .value = ERR_UNBALANCED_PAREN, @@ -156,7 +220,7 @@ parse_tree(Parser *parser, Errors *errors) { Root * parse(Token *tokens, Errors *errors) { - Root *roots = NULL; + // Build initial AST. array_init(roots, 0); Parser parser = { .tokens = tokens, @@ -170,15 +234,24 @@ parse(Token *tokens, Errors *errors) { } array_push(roots, root); } + + // Perform semantic analysis. + // 1. Ensure core grammar is correct. + // 2. Check that symbols are defined before usage. + // 3. Remove unnecessary statements. return roots; } Object * object_alloc(Token tok, ObjectType type) { + if (objects == NULL) { + array_init(objects, 0); + } Object *node = malloc(sizeof(Object)); node->line = tok.line; node->col = tok.col; node->type = type; + array_push(objects, node); return node; } @@ -190,35 +263,36 @@ object_free(Object *node) { if (IS_STRING(node) || IS_SYMBOL(node)) { array_free(node->text); } - if (IS_PAIR(node)) { - object_free(node->car); - object_free(node->cdr); + if (IS_LAMBDA(node)) { + array_free(node->params); + array_free(node->body); } free(node); } void -free_roots(Root *roots) { - if (roots != NULL) { - for (size_t i = 0; i < array_size(roots); i++) { - object_free(roots[i]); +free_objects(void) { + if (objects != NULL) { + for (size_t i = 0; i < array_size(objects); i++) { + object_free(objects[i]); } - array_free(roots); + array_free(objects); } + array_free(roots); } void display_pair(Object *obj) { - object_display(obj->car); - if (obj->cdr == NULL) { + object_display(obj->head); + if (obj->tail == NULL) { return; } - if (IS_PAIR(obj->cdr)) { + if (IS_PAIR(obj->tail)) { printf(" "); - display_pair(obj->cdr); + display_pair(obj->tail); } else { printf(" . "); - object_display(obj->cdr); + object_display(obj->tail); } } @@ -252,6 +326,14 @@ object_display(Object *obj) { display_pair(obj); printf(")"); } break; + case OBJ_TYPE_LAMBDA: { + printf("#{lambda( "); + for (size_t i = 0; i < array_size(obj->params); i++) { + object_display(obj->params[i]); + printf(" "); + } + printf(")}"); + } break; } return; } diff --git a/src/parser.h b/src/parser.h index ee5febe..17bd6d6 100755 --- a/src/parser.h +++ b/src/parser.h @@ -11,6 +11,7 @@ typedef enum ObjectType { OBJ_TYPE_SYMBOL, OBJ_TYPE_STRING, OBJ_TYPE_PAIR, + OBJ_TYPE_LAMBDA, } ObjectType; typedef struct Object { @@ -25,9 +26,15 @@ typedef struct Object { // OBJ_TYPE_PAIR struct { - struct Object *car; - struct Object *cdr; + struct Object *head; + struct Object *tail; }; + + // OBJ_TYPE_LAMBDA + struct { + struct Object **params; + struct Object **body; + }; }; size_t line; @@ -55,6 +62,7 @@ Object * parse_string(Token tok); Object * parse_bool(Token tok); Object * parse_fixnum(Token tok); Object * parse_list(Parser *parser, Errors *errors); +Object * parse_lambda(Parser *parser, Errors *errors); Root * parse(Token *tokens, Errors *errors); // Object operations. @@ -63,7 +71,7 @@ void object_display(Object *obj); // Manage resources. Object * object_alloc(Token tok, ObjectType type); void object_free(Object *node); -void free_roots(Root *roots); +void free_objects(void); // // Helper macros. @@ -77,6 +85,7 @@ void free_roots(Root *roots); #define IS_STRING(VAL) ((VAL)->type == OBJ_TYPE_STRING) #define IS_SYMBOL(VAL) ((VAL)->type == OBJ_TYPE_SYMBOL) #define IS_PAIR(VAL) ((VAL)->type == OBJ_TYPE_PAIR) +#define IS_LAMBDA(VAL) ((VAL)->type == OBJ_TYPE_LAMBDA) // Debug. #define OBJ_PRINT(OBJ) object_display(OBJ); printf("\n"); -- cgit v1.2.1