From 95709acb7f166b21f562ef3fcf8ba7cb5890d28a Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Fri, 29 Oct 2021 20:12:19 +0200 Subject: Deduplicate string/symbols text for fast equality checks --- src/main.c | 6 ++-- src/parser.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- src/parser.h | 24 +++++++++++--- 3 files changed, 112 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/main.c b/src/main.c index c734916..d4fa2c2 100755 --- a/src/main.c +++ b/src/main.c @@ -32,10 +32,10 @@ process_source(const StringView *source, const char *file_name) { } // Parser. - Root *roots = parse(tokens, &errors); + ParserResults pr = parse(tokens, &errors); if (errors.n != 0) { report_errors(&errors, file_name); - free_roots(roots); + free_parser_results(&pr); array_free(tokens); exit(EXIT_FAILURE); } @@ -46,7 +46,7 @@ process_source(const StringView *source, const char *file_name) { // TODO: Compilation. // Free resources. - free_roots(roots); + free_parser_results(&pr); } void diff --git a/src/parser.c b/src/parser.c index 2d70c9d..3c3b1af 100644 --- a/src/parser.c +++ b/src/parser.c @@ -52,18 +52,48 @@ parse_bool(Token tok) { } Object * -parse_string(Token tok) { +parse_string(Parser *parser, Token tok) { Object *ret = object_alloc(tok, OBJ_TYPE_STRING); array_init(ret->text, tok.value.n); array_insert(ret->text, tok.value.start, tok.value.n); + + // Check if text was already present. + ssize_t idx = -1; + for (size_t i = 0; i < array_size(parser->strings); i++) { + if (object_equal(ret, parser->strings[i])) { + idx = i; + break; + } + } + if (idx != -1) { + array_free(ret->text); + ret->text = parser->strings[idx]->text; + } else { + array_push(parser->strings, ret); + } return ret; } Object * -parse_symbol(Token tok) { +parse_symbol(Parser *parser, Token tok) { Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL); array_init(ret->text, tok.value.n); array_insert(ret->text, tok.value.start, tok.value.n); + + // Check if text was already present. + ssize_t idx = -1; + for (size_t i = 0; i < array_size(parser->symbols); i++) { + if (object_equal(ret, parser->symbols[i])) { + idx = i; + break; + } + } + if (idx != -1) { + array_free(ret->text); + ret->text = parser->symbols[idx]->text; + } else { + array_push(parser->symbols, ret); + } return ret; } @@ -133,10 +163,10 @@ parse_tree(Parser *parser, Errors *errors) { return parse_list(parser, errors); } break; case TOKEN_STRING: { - return parse_string(tok); + return parse_string(parser, tok); } break; case TOKEN_SYMBOL: { - return parse_symbol(tok); + return parse_symbol(parser, tok); } break; case TOKEN_NIL: { return object_alloc(tok, OBJ_TYPE_NIL); @@ -154,7 +184,7 @@ parse_tree(Parser *parser, Errors *errors) { return NULL; } -Root * +ParserResults parse(Token *tokens, Errors *errors) { Root *roots = NULL; array_init(roots, 0); @@ -162,6 +192,8 @@ parse(Token *tokens, Errors *errors) { .tokens = tokens, .current = 0, }; + array_init(parser.symbols, 0); + array_init(parser.strings, 0); while (has_next_token(&parser)) { Object *root = parse_tree(&parser, errors); OBJ_PRINT(root); @@ -170,7 +202,11 @@ parse(Token *tokens, Errors *errors) { } array_push(roots, root); } - return roots; + return (ParserResults){ + .roots = roots, + .symbols = parser.symbols, + .strings = parser.strings, + }; } Object * @@ -187,9 +223,6 @@ object_free(Object *node) { if (node == NULL) { return; } - if (IS_STRING(node) || IS_SYMBOL(node)) { - array_free(node->text); - } if (IS_PAIR(node)) { object_free(node->car); object_free(node->cdr); @@ -198,12 +231,24 @@ object_free(Object *node) { } void -free_roots(Root *roots) { - if (roots != NULL) { - for (size_t i = 0; i < array_size(roots); i++) { - object_free(roots[i]); +free_parser_results(ParserResults *pr) { + if (pr->symbols != NULL) { + for (size_t i = 0; i < array_size(pr->symbols); i++) { + array_free(pr->symbols[i]->text); } - array_free(roots); + array_free(pr->symbols); + } + if (pr->strings != NULL) { + for (size_t i = 0; i < array_size(pr->strings); i++) { + array_free(pr->strings[i]->text); + } + array_free(pr->strings); + } + if (pr->roots != NULL) { + for (size_t i = 0; i < array_size(pr->roots); i++) { + object_free(pr->roots[i]); + } + array_free(pr->roots); } } @@ -255,3 +300,33 @@ object_display(Object *obj) { } return; } + +bool +object_equal(Object *a, Object *b) { + if (a->type != b->type) { + return false; + } + switch (a->type) { + case OBJ_TYPE_TRUE: + case OBJ_TYPE_FALSE: { + return true; + } break; + case OBJ_TYPE_FIXNUM: { + return a->fixnum == b->fixnum; + } break; + case OBJ_TYPE_SYMBOL: + case OBJ_TYPE_STRING: { + if (array_size(a->text) != array_size(b->text)) { + return false; + } + for (size_t i = 0; i < array_size(a->text); i++) { + if (a->text[i] != b->text[i]) { + return false; + } + } + return true; + } break; + default: break; + } + return a == b; +} diff --git a/src/parser.h b/src/parser.h index ee5febe..f438863 100755 --- a/src/parser.h +++ b/src/parser.h @@ -37,10 +37,25 @@ typedef struct Object { typedef struct Parser { Token *tokens; size_t current; + + // Unique symbols and strings. + Object **symbols; + Object **strings; } Parser; typedef Object* Root; +typedef struct ParserResults { + // All root statements. + Root *roots; + + // Unique symbols (tracking only first occurence). + Object **symbols; + + // Unique strings (tracking only first occurence). + Object **strings; +} ParserResults; + // Mimics the functionality in the Scanner functions, but for tokens. Token next_token(Parser *parser); Token previous_token(Parser *parser); @@ -50,20 +65,21 @@ bool has_next_token(const Parser *parser); // Parsing operations. Object * parse_tree(Parser *parser, Errors *errors); -Object * parse_symbol(Token tok); -Object * parse_string(Token tok); +Object * parse_symbol(Parser *parser, Token tok); +Object * parse_string(Parser *parser, Token tok); Object * parse_bool(Token tok); Object * parse_fixnum(Token tok); Object * parse_list(Parser *parser, Errors *errors); -Root * parse(Token *tokens, Errors *errors); +ParserResults parse(Token *tokens, Errors *errors); // Object operations. void object_display(Object *obj); +bool object_equal(Object *a, Object *b); // Manage resources. Object * object_alloc(Token tok, ObjectType type); void object_free(Object *node); -void free_roots(Root *roots); +void free_parser_results(ParserResults *pr); // // Helper macros. -- cgit v1.2.1