From 95709acb7f166b21f562ef3fcf8ba7cb5890d28a Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Fri, 29 Oct 2021 20:12:19 +0200 Subject: Deduplicate string/symbols text for fast equality checks --- src/parser.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 89 insertions(+), 14 deletions(-) (limited to 'src/parser.c') diff --git a/src/parser.c b/src/parser.c index 2d70c9d..3c3b1af 100644 --- a/src/parser.c +++ b/src/parser.c @@ -52,18 +52,48 @@ parse_bool(Token tok) { } Object * -parse_string(Token tok) { +parse_string(Parser *parser, Token tok) { Object *ret = object_alloc(tok, OBJ_TYPE_STRING); array_init(ret->text, tok.value.n); array_insert(ret->text, tok.value.start, tok.value.n); + + // Check if text was already present. + ssize_t idx = -1; + for (size_t i = 0; i < array_size(parser->strings); i++) { + if (object_equal(ret, parser->strings[i])) { + idx = i; + break; + } + } + if (idx != -1) { + array_free(ret->text); + ret->text = parser->strings[idx]->text; + } else { + array_push(parser->strings, ret); + } return ret; } Object * -parse_symbol(Token tok) { +parse_symbol(Parser *parser, Token tok) { Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL); array_init(ret->text, tok.value.n); array_insert(ret->text, tok.value.start, tok.value.n); + + // Check if text was already present. + ssize_t idx = -1; + for (size_t i = 0; i < array_size(parser->symbols); i++) { + if (object_equal(ret, parser->symbols[i])) { + idx = i; + break; + } + } + if (idx != -1) { + array_free(ret->text); + ret->text = parser->symbols[idx]->text; + } else { + array_push(parser->symbols, ret); + } return ret; } @@ -133,10 +163,10 @@ parse_tree(Parser *parser, Errors *errors) { return parse_list(parser, errors); } break; case TOKEN_STRING: { - return parse_string(tok); + return parse_string(parser, tok); } break; case TOKEN_SYMBOL: { - return parse_symbol(tok); + return parse_symbol(parser, tok); } break; case TOKEN_NIL: { return object_alloc(tok, OBJ_TYPE_NIL); @@ -154,7 +184,7 @@ parse_tree(Parser *parser, Errors *errors) { return NULL; } -Root * +ParserResults parse(Token *tokens, Errors *errors) { Root *roots = NULL; array_init(roots, 0); @@ -162,6 +192,8 @@ parse(Token *tokens, Errors *errors) { .tokens = tokens, .current = 0, }; + array_init(parser.symbols, 0); + array_init(parser.strings, 0); while (has_next_token(&parser)) { Object *root = parse_tree(&parser, errors); OBJ_PRINT(root); @@ -170,7 +202,11 @@ parse(Token *tokens, Errors *errors) { } array_push(roots, root); } - return roots; + return (ParserResults){ + .roots = roots, + .symbols = parser.symbols, + .strings = parser.strings, + }; } Object * @@ -187,9 +223,6 @@ object_free(Object *node) { if (node == NULL) { return; } - if (IS_STRING(node) || IS_SYMBOL(node)) { - array_free(node->text); - } if (IS_PAIR(node)) { object_free(node->car); object_free(node->cdr); @@ -198,12 +231,24 @@ object_free(Object *node) { } void -free_roots(Root *roots) { - if (roots != NULL) { - for (size_t i = 0; i < array_size(roots); i++) { - object_free(roots[i]); +free_parser_results(ParserResults *pr) { + if (pr->symbols != NULL) { + for (size_t i = 0; i < array_size(pr->symbols); i++) { + array_free(pr->symbols[i]->text); } - array_free(roots); + array_free(pr->symbols); + } + if (pr->strings != NULL) { + for (size_t i = 0; i < array_size(pr->strings); i++) { + array_free(pr->strings[i]->text); + } + array_free(pr->strings); + } + if (pr->roots != NULL) { + for (size_t i = 0; i < array_size(pr->roots); i++) { + object_free(pr->roots[i]); + } + array_free(pr->roots); } } @@ -255,3 +300,33 @@ object_display(Object *obj) { } return; } + +bool +object_equal(Object *a, Object *b) { + if (a->type != b->type) { + return false; + } + switch (a->type) { + case OBJ_TYPE_TRUE: + case OBJ_TYPE_FALSE: { + return true; + } break; + case OBJ_TYPE_FIXNUM: { + return a->fixnum == b->fixnum; + } break; + case OBJ_TYPE_SYMBOL: + case OBJ_TYPE_STRING: { + if (array_size(a->text) != array_size(b->text)) { + return false; + } + for (size_t i = 0; i < array_size(a->text); i++) { + if (a->text[i] != b->text[i]) { + return false; + } + } + return true; + } break; + default: break; + } + return a == b; +} -- cgit v1.2.1