aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2021-10-29 20:12:19 +0200
committerBad Diode <bd@badd10de.dev>2021-10-29 20:12:19 +0200
commit95709acb7f166b21f562ef3fcf8ba7cb5890d28a (patch)
tree5a3629189218d2c123228831348eae694b2a3a8a
parent5ed73b695e6b463149ab0c9ae3eccb26a4ec5807 (diff)
downloadbdl-95709acb7f166b21f562ef3fcf8ba7cb5890d28a.tar.gz
bdl-95709acb7f166b21f562ef3fcf8ba7cb5890d28a.zip
Deduplicate string/symbols text for fast equality checks
-rwxr-xr-xsrc/main.c6
-rw-r--r--src/parser.c103
-rwxr-xr-xsrc/parser.h24
3 files changed, 112 insertions, 21 deletions
diff --git a/src/main.c b/src/main.c
index c734916..d4fa2c2 100755
--- a/src/main.c
+++ b/src/main.c
@@ -32,10 +32,10 @@ process_source(const StringView *source, const char *file_name) {
32 } 32 }
33 33
34 // Parser. 34 // Parser.
35 Root *roots = parse(tokens, &errors); 35 ParserResults pr = parse(tokens, &errors);
36 if (errors.n != 0) { 36 if (errors.n != 0) {
37 report_errors(&errors, file_name); 37 report_errors(&errors, file_name);
38 free_roots(roots); 38 free_parser_results(&pr);
39 array_free(tokens); 39 array_free(tokens);
40 exit(EXIT_FAILURE); 40 exit(EXIT_FAILURE);
41 } 41 }
@@ -46,7 +46,7 @@ process_source(const StringView *source, const char *file_name) {
46 // TODO: Compilation. 46 // TODO: Compilation.
47 47
48 // Free resources. 48 // Free resources.
49 free_roots(roots); 49 free_parser_results(&pr);
50} 50}
51 51
52void 52void
diff --git a/src/parser.c b/src/parser.c
index 2d70c9d..3c3b1af 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -52,18 +52,48 @@ parse_bool(Token tok) {
52} 52}
53 53
54Object * 54Object *
55parse_string(Token tok) { 55parse_string(Parser *parser, Token tok) {
56 Object *ret = object_alloc(tok, OBJ_TYPE_STRING); 56 Object *ret = object_alloc(tok, OBJ_TYPE_STRING);
57 array_init(ret->text, tok.value.n); 57 array_init(ret->text, tok.value.n);
58 array_insert(ret->text, tok.value.start, tok.value.n); 58 array_insert(ret->text, tok.value.start, tok.value.n);
59
60 // Check if text was already present.
61 ssize_t idx = -1;
62 for (size_t i = 0; i < array_size(parser->strings); i++) {
63 if (object_equal(ret, parser->strings[i])) {
64 idx = i;
65 break;
66 }
67 }
68 if (idx != -1) {
69 array_free(ret->text);
70 ret->text = parser->strings[idx]->text;
71 } else {
72 array_push(parser->strings, ret);
73 }
59 return ret; 74 return ret;
60} 75}
61 76
62Object * 77Object *
63parse_symbol(Token tok) { 78parse_symbol(Parser *parser, Token tok) {
64 Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL); 79 Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL);
65 array_init(ret->text, tok.value.n); 80 array_init(ret->text, tok.value.n);
66 array_insert(ret->text, tok.value.start, tok.value.n); 81 array_insert(ret->text, tok.value.start, tok.value.n);
82
83 // Check if text was already present.
84 ssize_t idx = -1;
85 for (size_t i = 0; i < array_size(parser->symbols); i++) {
86 if (object_equal(ret, parser->symbols[i])) {
87 idx = i;
88 break;
89 }
90 }
91 if (idx != -1) {
92 array_free(ret->text);
93 ret->text = parser->symbols[idx]->text;
94 } else {
95 array_push(parser->symbols, ret);
96 }
67 return ret; 97 return ret;
68} 98}
69 99
@@ -133,10 +163,10 @@ parse_tree(Parser *parser, Errors *errors) {
133 return parse_list(parser, errors); 163 return parse_list(parser, errors);
134 } break; 164 } break;
135 case TOKEN_STRING: { 165 case TOKEN_STRING: {
136 return parse_string(tok); 166 return parse_string(parser, tok);
137 } break; 167 } break;
138 case TOKEN_SYMBOL: { 168 case TOKEN_SYMBOL: {
139 return parse_symbol(tok); 169 return parse_symbol(parser, tok);
140 } break; 170 } break;
141 case TOKEN_NIL: { 171 case TOKEN_NIL: {
142 return object_alloc(tok, OBJ_TYPE_NIL); 172 return object_alloc(tok, OBJ_TYPE_NIL);
@@ -154,7 +184,7 @@ parse_tree(Parser *parser, Errors *errors) {
154 return NULL; 184 return NULL;
155} 185}
156 186
157Root * 187ParserResults
158parse(Token *tokens, Errors *errors) { 188parse(Token *tokens, Errors *errors) {
159 Root *roots = NULL; 189 Root *roots = NULL;
160 array_init(roots, 0); 190 array_init(roots, 0);
@@ -162,6 +192,8 @@ parse(Token *tokens, Errors *errors) {
162 .tokens = tokens, 192 .tokens = tokens,
163 .current = 0, 193 .current = 0,
164 }; 194 };
195 array_init(parser.symbols, 0);
196 array_init(parser.strings, 0);
165 while (has_next_token(&parser)) { 197 while (has_next_token(&parser)) {
166 Object *root = parse_tree(&parser, errors); 198 Object *root = parse_tree(&parser, errors);
167 OBJ_PRINT(root); 199 OBJ_PRINT(root);
@@ -170,7 +202,11 @@ parse(Token *tokens, Errors *errors) {
170 } 202 }
171 array_push(roots, root); 203 array_push(roots, root);
172 } 204 }
173 return roots; 205 return (ParserResults){
206 .roots = roots,
207 .symbols = parser.symbols,
208 .strings = parser.strings,
209 };
174} 210}
175 211
176Object * 212Object *
@@ -187,9 +223,6 @@ object_free(Object *node) {
187 if (node == NULL) { 223 if (node == NULL) {
188 return; 224 return;
189 } 225 }
190 if (IS_STRING(node) || IS_SYMBOL(node)) {
191 array_free(node->text);
192 }
193 if (IS_PAIR(node)) { 226 if (IS_PAIR(node)) {
194 object_free(node->car); 227 object_free(node->car);
195 object_free(node->cdr); 228 object_free(node->cdr);
@@ -198,12 +231,24 @@ object_free(Object *node) {
198} 231}
199 232
200void 233void
201free_roots(Root *roots) { 234free_parser_results(ParserResults *pr) {
202 if (roots != NULL) { 235 if (pr->symbols != NULL) {
203 for (size_t i = 0; i < array_size(roots); i++) { 236 for (size_t i = 0; i < array_size(pr->symbols); i++) {
204 object_free(roots[i]); 237 array_free(pr->symbols[i]->text);
205 } 238 }
206 array_free(roots); 239 array_free(pr->symbols);
240 }
241 if (pr->strings != NULL) {
242 for (size_t i = 0; i < array_size(pr->strings); i++) {
243 array_free(pr->strings[i]->text);
244 }
245 array_free(pr->strings);
246 }
247 if (pr->roots != NULL) {
248 for (size_t i = 0; i < array_size(pr->roots); i++) {
249 object_free(pr->roots[i]);
250 }
251 array_free(pr->roots);
207 } 252 }
208} 253}
209 254
@@ -255,3 +300,33 @@ object_display(Object *obj) {
255 } 300 }
256 return; 301 return;
257} 302}
303
304bool
305object_equal(Object *a, Object *b) {
306 if (a->type != b->type) {
307 return false;
308 }
309 switch (a->type) {
310 case OBJ_TYPE_TRUE:
311 case OBJ_TYPE_FALSE: {
312 return true;
313 } break;
314 case OBJ_TYPE_FIXNUM: {
315 return a->fixnum == b->fixnum;
316 } break;
317 case OBJ_TYPE_SYMBOL:
318 case OBJ_TYPE_STRING: {
319 if (array_size(a->text) != array_size(b->text)) {
320 return false;
321 }
322 for (size_t i = 0; i < array_size(a->text); i++) {
323 if (a->text[i] != b->text[i]) {
324 return false;
325 }
326 }
327 return true;
328 } break;
329 default: break;
330 }
331 return a == b;
332}
diff --git a/src/parser.h b/src/parser.h
index ee5febe..f438863 100755
--- a/src/parser.h
+++ b/src/parser.h
@@ -37,10 +37,25 @@ typedef struct Object {
37typedef struct Parser { 37typedef struct Parser {
38 Token *tokens; 38 Token *tokens;
39 size_t current; 39 size_t current;
40
41 // Unique symbols and strings.
42 Object **symbols;
43 Object **strings;
40} Parser; 44} Parser;
41 45
42typedef Object* Root; 46typedef Object* Root;
43 47
48typedef struct ParserResults {
49 // All root statements.
50 Root *roots;
51
52 // Unique symbols (tracking only first occurence).
53 Object **symbols;
54
55 // Unique strings (tracking only first occurence).
56 Object **strings;
57} ParserResults;
58
44// Mimics the functionality in the Scanner functions, but for tokens. 59// Mimics the functionality in the Scanner functions, but for tokens.
45Token next_token(Parser *parser); 60Token next_token(Parser *parser);
46Token previous_token(Parser *parser); 61Token previous_token(Parser *parser);
@@ -50,20 +65,21 @@ bool has_next_token(const Parser *parser);
50 65
51// Parsing operations. 66// Parsing operations.
52Object * parse_tree(Parser *parser, Errors *errors); 67Object * parse_tree(Parser *parser, Errors *errors);
53Object * parse_symbol(Token tok); 68Object * parse_symbol(Parser *parser, Token tok);
54Object * parse_string(Token tok); 69Object * parse_string(Parser *parser, Token tok);
55Object * parse_bool(Token tok); 70Object * parse_bool(Token tok);
56Object * parse_fixnum(Token tok); 71Object * parse_fixnum(Token tok);
57Object * parse_list(Parser *parser, Errors *errors); 72Object * parse_list(Parser *parser, Errors *errors);
58Root * parse(Token *tokens, Errors *errors); 73ParserResults parse(Token *tokens, Errors *errors);
59 74
60// Object operations. 75// Object operations.
61void object_display(Object *obj); 76void object_display(Object *obj);
77bool object_equal(Object *a, Object *b);
62 78
63// Manage resources. 79// Manage resources.
64Object * object_alloc(Token tok, ObjectType type); 80Object * object_alloc(Token tok, ObjectType type);
65void object_free(Object *node); 81void object_free(Object *node);
66void free_roots(Root *roots); 82void free_parser_results(ParserResults *pr);
67 83
68// 84//
69// Helper macros. 85// Helper macros.