aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2021-10-09 11:45:54 +0200
committerBad Diode <bd@badd10de.dev>2021-10-09 11:45:54 +0200
commitaa5db9c6d13f353cfb839f732eb81d02d74fc1b7 (patch)
treed3e352e3e49e70804d74e36ab54008fc4ef3a65c
parent33c9c0cd69f6a3f5954222704755771f03c2ea04 (diff)
downloadbdl-aa5db9c6d13f353cfb839f732eb81d02d74fc1b7.tar.gz
bdl-aa5db9c6d13f353cfb839f732eb81d02d74fc1b7.zip
Refactor tokenizer/parser to assign/use token types
-rwxr-xr-xsrc/bootstrap/main.c294
1 files changed, 170 insertions, 124 deletions
diff --git a/src/bootstrap/main.c b/src/bootstrap/main.c
index 3c5663a..65a61f4 100755
--- a/src/bootstrap/main.c
+++ b/src/bootstrap/main.c
@@ -60,21 +60,65 @@ read_line(void) {
60 return (StringView){.start = (char *)&readline_buf, .n = n}; 60 return (StringView){.start = (char *)&readline_buf, .n = n};
61} 61}
62 62
63typedef enum TokenType {
64 TOKEN_UNKNOWN = 0,
65 TOKEN_LPAREN,
66 TOKEN_RPAREN,
67 TOKEN_FIXNUM,
68 TOKEN_SYMBOL,
69 TOKEN_BOOL,
70 TOKEN_STRING,
71} TokenType;
72
73typedef struct Token {
74 TokenType type;
75 StringView value;
76} Token;
77
63typedef struct Tokens { 78typedef struct Tokens {
64 StringView *start; 79 Token *start;
65 size_t n; 80 size_t n;
66} Tokens; 81} Tokens;
67 82
83#define TRUE_TOKEN (StringView){"true", 4}
84#define FALSE_TOKEN (StringView){"false", 5}
85#define LPAREN_TOKEN (StringView){"(", 1}
86#define RPAREN_TOKEN (StringView){")", 1}
87
88TokenType
89find_token_type(StringView value) {
90 bool is_fixnum = true;
91 for (size_t i = 0; i < value.n; i++) {
92 char c = value.start[i];
93 if (i == 0 && c == '-' && value.n > 1) {
94 continue;
95 }
96 if (!isdigit(c)) {
97 is_fixnum = false;
98 break;
99 }
100 }
101 if (is_fixnum) {
102 return TOKEN_FIXNUM;
103 }
104
105 if (sv_equal(value, TRUE_TOKEN) || sv_equal(value, FALSE_TOKEN)) {
106 return TOKEN_BOOL;
107 }
108
109 return TOKEN_SYMBOL;
110}
111
68Tokens 112Tokens
69tokenize(StringView sv) { 113tokenize(StringView sv) {
70 // NOTE: Not allocating any memory for now, but we are limited by a maximum 114 // NOTE: Not allocating any memory for now, but we are limited by a maximum
71 // number of tokens we can process. 115 // number of tokens we can process.
72 #define TOKENS_BUF_SIZE 1024 116 #define TOKENS_BUF_SIZE 1024
73 static StringView tokens_buf[TOKENS_BUF_SIZE]; 117 static Token tokens_buf[TOKENS_BUF_SIZE];
74 118
75 // Clear buffer. 119 // Clear buffer.
76 for (size_t i = 0; i < TOKENS_BUF_SIZE; i++) { 120 for (size_t i = 0; i < TOKENS_BUF_SIZE; i++) {
77 tokens_buf[i] = (StringView){0}; 121 tokens_buf[i] = (Token){0};
78 } 122 }
79 123
80 size_t n = 0; 124 size_t n = 0;
@@ -88,11 +132,15 @@ tokenize(StringView sv) {
88 case '\t': 132 case '\t':
89 case '\v': { 133 case '\v': {
90 if (token_n != 0) { 134 if (token_n != 0) {
91 // Push token. 135 Token token = (Token){
92 tokens_buf[n++] = (StringView){ 136 .type = TOKEN_UNKNOWN,
93 .start = &sv.start[i - token_n], 137 .value = (StringView){
94 .n = token_n, 138 .start = &sv.start[i - token_n],
139 .n = token_n,
140 }
95 }; 141 };
142 token.type = find_token_type(token.value);
143 tokens_buf[n++] = token;
96 token_n = 0; 144 token_n = 0;
97 } 145 }
98 continue; 146 continue;
@@ -117,19 +165,14 @@ tokenize(StringView sv) {
117 string_end++; 165 string_end++;
118 } 166 }
119 167
120 // Push string token. 168 Token token = (Token){
121 tokens_buf[n++] = (StringView){ 169 .type = TOKEN_STRING,
122 .start = &sv.start[string_start - 1], 170 .value = (StringView){
123 .n = 1, 171 .start = &sv.start[string_start],
124 }; 172 .n = string_end - string_start,
125 tokens_buf[n++] = (StringView){ 173 }
126 .start = &sv.start[string_start],
127 .n = string_end - string_start,
128 };
129 tokens_buf[n++] = (StringView){
130 .start = &sv.start[string_end],
131 .n = 1,
132 }; 174 };
175 tokens_buf[n++] = token;
133 token_n = 0; 176 token_n = 0;
134 i += string_end - string_start + 1; 177 i += string_end - string_start + 1;
135 } break; 178 } break;
@@ -146,11 +189,12 @@ tokenize(StringView sv) {
146 fprintf(stderr, "error: lparen delimiter within symbol name\n"); 189 fprintf(stderr, "error: lparen delimiter within symbol name\n");
147 return (Tokens){0}; 190 return (Tokens){0};
148 } 191 }
149 // Push paren token. 192
150 tokens_buf[n++] = (StringView){ 193 Token token = (Token){
151 .start = &sv.start[i], 194 .type = TOKEN_LPAREN,
152 .n = 1, 195 .value = LPAREN_TOKEN,
153 }; 196 };
197 tokens_buf[n++] = token;
154 } break; 198 } break;
155 case ')': { 199 case ')': {
156 if ((i + 1) < sv.n) { 200 if ((i + 1) < sv.n) {
@@ -163,20 +207,27 @@ tokenize(StringView sv) {
163 207
164 if (token_n != 0) { 208 if (token_n != 0) {
165 // Push previous token. 209 // Push previous token.
166 tokens_buf[n++] = (StringView){ 210 Token token = (Token){
167 .start = &sv.start[i - token_n], 211 .type = TOKEN_UNKNOWN,
168 .n = token_n, 212 .value = (StringView){
213 .start = &sv.start[i - token_n],
214 .n = token_n,
215 }
169 }; 216 };
217 token.type = find_token_type(token.value);
218 tokens_buf[n++] = token;
170 token_n = 0; 219 token_n = 0;
171 } 220 }
172 221
173 // Push paren token. 222 Token token = (Token){
174 tokens_buf[n++] = (StringView){ 223 .type = TOKEN_RPAREN,
175 .start = &sv.start[i], 224 .value = RPAREN_TOKEN,
176 .n = 1,
177 }; 225 };
226 tokens_buf[n++] = token;
227 } break;
228 case EOF: {
229 break;
178 } break; 230 } break;
179 // TODO: Handle double quotes and escaped quotes.
180 default: { 231 default: {
181 token_n++; 232 token_n++;
182 } break; 233 } break;
@@ -184,21 +235,26 @@ tokenize(StringView sv) {
184 } 235 }
185 if (token_n != 0) { 236 if (token_n != 0) {
186 // End of line encountered. 237 // End of line encountered.
187 tokens_buf[n++] = (StringView){ 238 Token token = (Token){
188 .start = &sv.start[sv.n - token_n], 239 .type = TOKEN_UNKNOWN,
189 .n = token_n, 240 .value = (StringView){
241 .start = &sv.start[sv.n - token_n],
242 .n = token_n,
243 }
190 }; 244 };
245 token.type = find_token_type(token.value);
246 tokens_buf[n++] = token;
191 } 247 }
192 248
193 return (Tokens){.start = (StringView *)&tokens_buf, .n = n}; 249 return (Tokens){.start = (Token *)&tokens_buf, .n = n};
194} 250}
195 251
196StringView * 252Token *
197consume_token(Tokens *tokens) { 253consume_token(Tokens *tokens) {
198 if (tokens->n == 0) { 254 if (tokens->n == 0) {
199 return NULL; 255 return NULL;
200 } 256 }
201 StringView *ret = tokens->start; 257 Token *ret = tokens->start;
202 tokens->start = &tokens->start[1]; 258 tokens->start = &tokens->start[1];
203 tokens->n--; 259 tokens->n--;
204 return ret; 260 return ret;
@@ -250,7 +306,9 @@ typedef struct Object {
250// Singletons. 306// Singletons.
251// 307//
252 308
253Object *empty_list; 309Object *obj_nil;
310Object *obj_true;
311Object *obj_false;
254 312
255// 313//
256// Environment. 314// Environment.
@@ -330,7 +388,7 @@ make_procedure(Object *(*proc)(struct Object *args)) {
330} 388}
331 389
332Object * 390Object *
333cons(Object *car, Object *cdr) { 391make_pair(Object *car, Object *cdr) {
334 Object *obj = malloc(sizeof(Object)); 392 Object *obj = malloc(sizeof(Object));
335 obj->type = OBJ_TYPE_PAIR; 393 obj->type = OBJ_TYPE_PAIR;
336 obj->car = car; 394 obj->car = car;
@@ -338,98 +396,68 @@ cons(Object *car, Object *cdr) {
338 return obj; 396 return obj;
339} 397}
340 398
341bool
342token_is_fixnum(StringView token) {
343 for (size_t i = 0; i < token.n; i++) {
344 char c = token.start[i];
345 if (i == 0 && c == '-' && token.n > 1) {
346 continue;
347 }
348 if (!isdigit(c)) {
349 return false;
350 }
351 }
352 return true;
353}
354
355#define TRUE_TOKEN (StringView){"true", 4}
356#define FALSE_TOKEN (StringView){"false", 5}
357
358Object * 399Object *
359build_ast(Tokens *tokens) { 400parse(Tokens *tokens) {
360 // DEBUG: Printing tokens.
361 // printf("N_TOKENS: %ld\n", tokens->n);
362 // for (size_t i = 0; i < tokens->n; i++) {
363 // printf("TOKEN: ");
364 // sv_write(tokens->start[i]);
365 // printf("\tN: %ld", tokens->start[i].n);
366 // printf("\n");
367 // }
368
369 // TODO: Report error if we haven't consumed all the tokens?
370 while (tokens->n > 0) { 401 while (tokens->n > 0) {
371 StringView *token = consume_token(tokens); 402 Token *token = consume_token(tokens);
372 if (token == NULL) { 403 if (token == NULL) {
373 return NULL; 404 return NULL;
374 } 405 }
375 406
376 // OBJ_TYPE_FIXNUM 407 switch (token->type) {
377 if (token_is_fixnum(*token)) { 408 case TOKEN_FIXNUM: {
378 // Convert token to fixnum. 409 ssize_t num = 0;
379 ssize_t num = 0; 410 int sign = 1;
380 int sign = 1; 411 for (size_t i = 0; i < token->value.n; i++) {
381 for (size_t i = 0; i < token->n; i++) { 412 char c = token->value.start[i];
382 char c = token->start[i]; 413 if (c == '-') {
383 if (c == '-') { 414 sign = -1;
384 sign = -1; 415 continue;
385 continue; 416 }
417 num = num * 10 + (c - '0');
386 } 418 }
387 num = num * 10 + (c - '0'); 419 return make_fixnum(num * sign);
388 } 420 } break;
389 return make_fixnum(num * sign); 421 case TOKEN_BOOL: {
390 } 422 if (sv_equal(token->value, TRUE_TOKEN)) {
391 423 return obj_true;
392 // OBJ_TYPE_BOOL 424 }
393 if (sv_equal(*token, TRUE_TOKEN)) { 425 if (sv_equal(token->value, FALSE_TOKEN)) {
394 return make_boolean(true); 426 return obj_false;
395 } 427 }
396 if (sv_equal(*token, FALSE_TOKEN)) { 428 } break;
397 return make_boolean(false); 429 case TOKEN_RPAREN: {
398 }
399
400 // OBJ_TYPE_LIST
401 if (token->start[0] == ')') {
402 return NULL;
403 }
404 if (token->start[0] == '(') {
405 if (tokens->n > 0 && tokens->start[0].start[0] == ')') {
406 return empty_list;
407 }
408
409 Object *next_obj = build_ast(tokens);
410 if (next_obj == NULL) {
411 return NULL; 430 return NULL;
412 } 431 } break;
413 Object *root = cons(next_obj, empty_list); 432 case TOKEN_LPAREN: {
414 Object *list = root; 433 if (tokens->n > 0 && tokens->start[0].type == TOKEN_RPAREN) {
415 while (tokens->n > 0 && (next_obj = build_ast(tokens)) != NULL) { 434 return obj_nil;
416 list->cdr = cons(next_obj, empty_list); 435 }
417 list = list->cdr;
418 }
419 return root;
420 }
421 436
422 // OBJ_TYPE_STRING 437 Object *next_obj = parse(tokens);
423 if (token->start[0] == '"') { 438 if (next_obj == NULL) {
424 Object *obj = make_empty_string(); 439 return NULL;
425 token = consume_token(tokens); 440 }
426 append_string(obj, *token); 441 Object *root = make_pair(next_obj, obj_nil);
427 consume_token(tokens); 442 Object *list = root;
428 return obj; 443 while (tokens->n > 0 && (next_obj = parse(tokens)) != NULL) {
444 list->cdr = make_pair(next_obj, obj_nil);
445 list = list->cdr;
446 }
447 return root;
448 } break;
449 case TOKEN_STRING: {
450 Object *obj = make_empty_string();
451 append_string(obj, token->value);
452 return obj;
453 } break;
454 case TOKEN_SYMBOL: {
455 return make_symbol(token->value.start, token->value.n);
456 } break;
457 default: {
458 fprintf(stderr, "error: unknown token\n");
459 } break;
429 } 460 }
430
431 // OBJ_TYPE_SYMBOL
432 return make_symbol(token->start, token->n);
433 } 461 }
434 462
435 return NULL; 463 return NULL;
@@ -601,7 +629,9 @@ init(void) {
601 } 629 }
602 630
603 // Initialize singletons. 631 // Initialize singletons.
604 empty_list = make_empty_list(); 632 obj_nil = make_empty_list();
633 obj_true = make_boolean(true);
634 obj_false = make_boolean(false);
605 635
606 // Add primitive functions. 636 // Add primitive functions.
607 environment[env_n++] = (EnvSymbol){make_symbol("+", 1), make_procedure(proc_add)}; 637 environment[env_n++] = (EnvSymbol){make_symbol("+", 1), make_procedure(proc_add)};
@@ -639,7 +669,7 @@ eval(Object *root) {
639 } 669 }
640 } break; 670 } break;
641 default: { 671 default: {
642 printf("TYPE NOT IMPLEMENTED FOR EVAL.\n"); 672 printf("error: can't eval type %d.\n", root->type);
643 } break; 673 } break;
644 } 674 }
645 675
@@ -654,8 +684,24 @@ main(void) {
654 printf(REPL_PROMPT); 684 printf(REPL_PROMPT);
655 StringView line = read_line(); 685 StringView line = read_line();
656 Tokens tokens = tokenize(line); 686 Tokens tokens = tokenize(line);
657 Object *ast = build_ast(&tokens); 687#if DEBUG
688 printf("N_TOKENS: %ld\n", tokens.n);
689 for (size_t i = 0; i < tokens.n; i++) {
690 printf("\tTYPE: %3d ", tokens.start[i].type);
691 printf("N: %3ld ", tokens.start[i].value.n);
692 printf("VALUE: ");
693 sv_write(tokens.start[i].value);
694 printf("\n");
695 }
696#endif
697 Object *ast = parse(&tokens);
658 if (ast) { 698 if (ast) {
699#if DEBUG
700 printf("AST: ");
701 display(ast);
702 printf("\n");
703 printf("EVAL: ");
704#endif
659 display(eval(ast)); 705 display(eval(ast));
660 printf("\n"); 706 printf("\n");
661 } 707 }