aboutsummaryrefslogtreecommitdiffstats
path: root/src/parser.c
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2021-10-29 20:12:19 +0200
committerBad Diode <bd@badd10de.dev>2021-10-29 20:12:19 +0200
commit95709acb7f166b21f562ef3fcf8ba7cb5890d28a (patch)
tree5a3629189218d2c123228831348eae694b2a3a8a /src/parser.c
parent5ed73b695e6b463149ab0c9ae3eccb26a4ec5807 (diff)
downloadbdl-95709acb7f166b21f562ef3fcf8ba7cb5890d28a.tar.gz
bdl-95709acb7f166b21f562ef3fcf8ba7cb5890d28a.zip
Deduplicate string/symbols text for fast equality checks
Diffstat (limited to 'src/parser.c')
-rw-r--r--src/parser.c103
1 files changed, 89 insertions, 14 deletions
diff --git a/src/parser.c b/src/parser.c
index 2d70c9d..3c3b1af 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -52,18 +52,48 @@ parse_bool(Token tok) {
52} 52}
53 53
54Object * 54Object *
55parse_string(Token tok) { 55parse_string(Parser *parser, Token tok) {
56 Object *ret = object_alloc(tok, OBJ_TYPE_STRING); 56 Object *ret = object_alloc(tok, OBJ_TYPE_STRING);
57 array_init(ret->text, tok.value.n); 57 array_init(ret->text, tok.value.n);
58 array_insert(ret->text, tok.value.start, tok.value.n); 58 array_insert(ret->text, tok.value.start, tok.value.n);
59
60 // Check if text was already present.
61 ssize_t idx = -1;
62 for (size_t i = 0; i < array_size(parser->strings); i++) {
63 if (object_equal(ret, parser->strings[i])) {
64 idx = i;
65 break;
66 }
67 }
68 if (idx != -1) {
69 array_free(ret->text);
70 ret->text = parser->strings[idx]->text;
71 } else {
72 array_push(parser->strings, ret);
73 }
59 return ret; 74 return ret;
60} 75}
61 76
62Object * 77Object *
63parse_symbol(Token tok) { 78parse_symbol(Parser *parser, Token tok) {
64 Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL); 79 Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL);
65 array_init(ret->text, tok.value.n); 80 array_init(ret->text, tok.value.n);
66 array_insert(ret->text, tok.value.start, tok.value.n); 81 array_insert(ret->text, tok.value.start, tok.value.n);
82
83 // Check if text was already present.
84 ssize_t idx = -1;
85 for (size_t i = 0; i < array_size(parser->symbols); i++) {
86 if (object_equal(ret, parser->symbols[i])) {
87 idx = i;
88 break;
89 }
90 }
91 if (idx != -1) {
92 array_free(ret->text);
93 ret->text = parser->symbols[idx]->text;
94 } else {
95 array_push(parser->symbols, ret);
96 }
67 return ret; 97 return ret;
68} 98}
69 99
@@ -133,10 +163,10 @@ parse_tree(Parser *parser, Errors *errors) {
133 return parse_list(parser, errors); 163 return parse_list(parser, errors);
134 } break; 164 } break;
135 case TOKEN_STRING: { 165 case TOKEN_STRING: {
136 return parse_string(tok); 166 return parse_string(parser, tok);
137 } break; 167 } break;
138 case TOKEN_SYMBOL: { 168 case TOKEN_SYMBOL: {
139 return parse_symbol(tok); 169 return parse_symbol(parser, tok);
140 } break; 170 } break;
141 case TOKEN_NIL: { 171 case TOKEN_NIL: {
142 return object_alloc(tok, OBJ_TYPE_NIL); 172 return object_alloc(tok, OBJ_TYPE_NIL);
@@ -154,7 +184,7 @@ parse_tree(Parser *parser, Errors *errors) {
154 return NULL; 184 return NULL;
155} 185}
156 186
157Root * 187ParserResults
158parse(Token *tokens, Errors *errors) { 188parse(Token *tokens, Errors *errors) {
159 Root *roots = NULL; 189 Root *roots = NULL;
160 array_init(roots, 0); 190 array_init(roots, 0);
@@ -162,6 +192,8 @@ parse(Token *tokens, Errors *errors) {
162 .tokens = tokens, 192 .tokens = tokens,
163 .current = 0, 193 .current = 0,
164 }; 194 };
195 array_init(parser.symbols, 0);
196 array_init(parser.strings, 0);
165 while (has_next_token(&parser)) { 197 while (has_next_token(&parser)) {
166 Object *root = parse_tree(&parser, errors); 198 Object *root = parse_tree(&parser, errors);
167 OBJ_PRINT(root); 199 OBJ_PRINT(root);
@@ -170,7 +202,11 @@ parse(Token *tokens, Errors *errors) {
170 } 202 }
171 array_push(roots, root); 203 array_push(roots, root);
172 } 204 }
173 return roots; 205 return (ParserResults){
206 .roots = roots,
207 .symbols = parser.symbols,
208 .strings = parser.strings,
209 };
174} 210}
175 211
176Object * 212Object *
@@ -187,9 +223,6 @@ object_free(Object *node) {
187 if (node == NULL) { 223 if (node == NULL) {
188 return; 224 return;
189 } 225 }
190 if (IS_STRING(node) || IS_SYMBOL(node)) {
191 array_free(node->text);
192 }
193 if (IS_PAIR(node)) { 226 if (IS_PAIR(node)) {
194 object_free(node->car); 227 object_free(node->car);
195 object_free(node->cdr); 228 object_free(node->cdr);
@@ -198,12 +231,24 @@ object_free(Object *node) {
198} 231}
199 232
200void 233void
201free_roots(Root *roots) { 234free_parser_results(ParserResults *pr) {
202 if (roots != NULL) { 235 if (pr->symbols != NULL) {
203 for (size_t i = 0; i < array_size(roots); i++) { 236 for (size_t i = 0; i < array_size(pr->symbols); i++) {
204 object_free(roots[i]); 237 array_free(pr->symbols[i]->text);
205 } 238 }
206 array_free(roots); 239 array_free(pr->symbols);
240 }
241 if (pr->strings != NULL) {
242 for (size_t i = 0; i < array_size(pr->strings); i++) {
243 array_free(pr->strings[i]->text);
244 }
245 array_free(pr->strings);
246 }
247 if (pr->roots != NULL) {
248 for (size_t i = 0; i < array_size(pr->roots); i++) {
249 object_free(pr->roots[i]);
250 }
251 array_free(pr->roots);
207 } 252 }
208} 253}
209 254
@@ -255,3 +300,33 @@ object_display(Object *obj) {
255 } 300 }
256 return; 301 return;
257} 302}
303
304bool
305object_equal(Object *a, Object *b) {
306 if (a->type != b->type) {
307 return false;
308 }
309 switch (a->type) {
310 case OBJ_TYPE_TRUE:
311 case OBJ_TYPE_FALSE: {
312 return true;
313 } break;
314 case OBJ_TYPE_FIXNUM: {
315 return a->fixnum == b->fixnum;
316 } break;
317 case OBJ_TYPE_SYMBOL:
318 case OBJ_TYPE_STRING: {
319 if (array_size(a->text) != array_size(b->text)) {
320 return false;
321 }
322 for (size_t i = 0; i < array_size(a->text); i++) {
323 if (a->text[i] != b->text[i]) {
324 return false;
325 }
326 }
327 return true;
328 } break;
329 default: break;
330 }
331 return a == b;
332}