aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2022-02-12 19:06:09 +0100
committerBad Diode <bd@badd10de.dev>2022-02-12 19:06:09 +0100
commitfa32ad3224b3e362e5f79eee8785334f4bebdbc8 (patch)
tree4c5cb46baa8dc010921d755f157d6e23db9ce5d0 /src
parentc4765a539ee01625dd310a02f0be16ec9a64e2e4 (diff)
downloadbdl-fa32ad3224b3e362e5f79eee8785334f4bebdbc8.tar.gz
bdl-fa32ad3224b3e362e5f79eee8785334f4bebdbc8.zip
Add boilerplate for parser
Diffstat (limited to 'src')
-rw-r--r--src/lexer.c62
-rw-r--r--src/main.c11
-rw-r--r--src/parser.c968
-rw-r--r--src/parser.h188
4 files changed, 154 insertions, 1075 deletions
diff --git a/src/lexer.c b/src/lexer.c
index 5175b1c..f63ff4f 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -141,67 +141,6 @@ is_delimiter(char c) {
141 return false; 141 return false;
142} 142}
143 143
144size_t
145scan_number_token(Scanner *scanner) {
146 // TODO: This looks like more a parsing problem than lexer,
147 // consider moving it there. If starts with `-` and there is no
148 // delimiter after, or if it starts with a number, it is
149 // TOKEN_NUMBER.
150 char first = scan_next(scanner);
151 char second = scan_peek(scanner);
152 size_t n = 1;
153 if (first == '0' && !is_delimiter(second)) {
154 if (second == 'x') {
155 // Hex constant.
156 scan_next(scanner);
157 n++;
158 if (is_delimiter(scan_peek(scanner))) {
159 return 0;
160 }
161 while (!is_delimiter(scan_peek(scanner))) {
162 char c = scan_next(scanner);
163 if (!(c >= '0' && c <= '9') &&
164 !(c >= 'a' && c <= 'f') &&
165 !(c >= 'A' && c <= 'F')) {
166 return 0;
167 }
168 n++;
169 }
170 return n;
171 } else if (second == 'b') {
172 // Binary constant.
173 scan_next(scanner);
174 n++;
175 if (is_delimiter(scan_peek(scanner))) {
176 return 0;
177 }
178 while (!is_delimiter(scan_peek(scanner))) {
179 char c = scan_next(scanner);
180 if (!(c == '0' || c == '1')) {
181 return 0;
182 }
183 n++;
184 }
185 }
186 }
187
188 // Decimal number or floating point.
189 bool has_dot = false;
190 while (!is_delimiter(scan_peek(scanner))) {
191 char c = scan_next(scanner);
192 if (c == '.') {
193 if (has_dot) {
194 return 0;
195 }
196 has_dot = true;
197 } else if (!(c >= '0' && c <= '9')) {
198 return 0;
199 }
200 n++;
201 }
202 return n;
203}
204
205TokenType 144TokenType
206find_token_type(const StringView value) { 145find_token_type(const StringView value) {
207 for (size_t i = 0; i < sizeof(keywords) / sizeof(Keyword); i++) { 146 for (size_t i = 0; i < sizeof(keywords) / sizeof(Keyword); i++) {
@@ -285,6 +224,7 @@ tokenize(const StringView *sv) {
285 } 224 }
286 size_t n = 1; 225 size_t n = 1;
287 bool is_number = c == '-' && !is_delimiter(scan_peek(&scanner)); 226 bool is_number = c == '-' && !is_delimiter(scan_peek(&scanner));
227 is_number = c == '+' && !is_delimiter(scan_peek(&scanner));
288 is_number = is_number || (c >= '0' && c <= '9'); 228 is_number = is_number || (c >= '0' && c <= '9');
289 if (is_number) { 229 if (is_number) {
290 while (!is_delimiter(scan_peek(&scanner))) { 230 while (!is_delimiter(scan_peek(&scanner))) {
diff --git a/src/main.c b/src/main.c
index 31fb5da..7a24895 100644
--- a/src/main.c
+++ b/src/main.c
@@ -7,7 +7,7 @@
7#include "string_view.c" 7#include "string_view.c"
8#include "errors.c" 8#include "errors.c"
9#include "lexer.c" 9#include "lexer.c"
10// #include "parser.c" 10#include "parser.c"
11// #include "ir.h" 11// #include "ir.h"
12// #include "compiler.h" 12// #include "compiler.h"
13 13
@@ -25,11 +25,14 @@ void
25process_source(const StringView *source, const char *file_name) { 25process_source(const StringView *source, const char *file_name) {
26 // Read tokens. 26 // Read tokens.
27 Token *tokens = tokenize(source); 27 Token *tokens = tokenize(source);
28 print_tokens(tokens); 28 // print_tokens(tokens);
29 check_errors(file_name);
30
31 // Parser.
32 parse(tokens);
33 // print_program(program);
29 check_errors(file_name); 34 check_errors(file_name);
30 35
31 // // Parser.
32 // Program program = parse(tokens, &errors);
33 // if (errors.n != 0) { 36 // if (errors.n != 0) {
34 // report_errors(&errors, file_name); 37 // report_errors(&errors, file_name);
35 // free_objects(); 38 // free_objects();
diff --git a/src/parser.c b/src/parser.c
index a5e2b42..dfc3e56 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -1,902 +1,150 @@
1#include "parser.h" 1#include "parser.h"
2#include "darray.h" 2#include "darray.h"
3 3
4static Object **objects = NULL;
5static Root *roots = NULL;
6static Environment **environments = NULL;
7
8// Builtin procedures.
9static Object builtins[] = {
10 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_ADD, .builtin_text = STRING("+") },
11 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_SUB, .builtin_text = STRING("-") },
12 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_MUL, .builtin_text = STRING("*") },
13 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_DIV, .builtin_text = STRING("/") },
14 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_MOD, .builtin_text = STRING("%") },
15 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_EQ, .builtin_text = STRING("=") },
16 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_LT, .builtin_text = STRING("<") },
17 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_GT, .builtin_text = STRING(">") },
18 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_LE, .builtin_text = STRING("<=") },
19 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_GE, .builtin_text = STRING(">=") },
20 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_NOT, .builtin_text = STRING("not") },
21 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_AND, .builtin_text = STRING("and") },
22 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_OR, .builtin_text = STRING("or") },
23 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_NIL, .builtin_text = STRING("nil?") },
24 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_ZERO, .builtin_text = STRING("zero?") },
25 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_FIXNUM, .builtin_text = STRING("fixnum?") },
26 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_BOOL, .builtin_text = STRING("bool?") },
27 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_PRINT, .builtin_text = STRING("print") },
28 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_CONS, .builtin_text = STRING("cons") },
29 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_CAR, .builtin_text = STRING("car") },
30 { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_CDR, .builtin_text = STRING("cdr") },
31};
32
33// Static singleton objects.
34static Object obj_nil = { .type = OBJ_TYPE_NIL };
35static Object obj_true = { .type = OBJ_TYPE_TRUE };
36static Object obj_false = { .type = OBJ_TYPE_FALSE };
37
38Token
39peek_token(const Parser *parser) {
40 if (parser->current >= array_size(parser->tokens)) {
41 return parser->tokens[array_size(parser->tokens) - 1];
42 }
43 return parser->tokens[parser->current];
44}
45
46Token 4Token
47next_token(Parser *parser) { 5next_token(Parser *parser) {
48 if (parser->current >= array_size(parser->tokens)) {
49 return parser->tokens[array_size(parser->tokens) - 1];
50 }
51 return parser->tokens[parser->current++]; 6 return parser->tokens[parser->current++];
52} 7}
53 8
54Token 9Token
55previous_token(Parser *parser) { 10peek_token(Parser *parser) {
56 return parser->tokens[parser->current - 1]; 11 return parser->tokens[parser->current];
57}
58
59Token
60rewind_token(Parser *parser) {
61 return parser->tokens[--parser->current];
62} 12}
63 13
64bool 14bool
65has_next_token(const Parser *parser) { 15has_next(Parser *parser) {
66 return parser->current < array_size(parser->tokens) 16 return parser->current < array_size(parser->tokens);
67 && peek_token(parser).type != TOKEN_EOF;
68}
69
70Object *
71parse_fixnum(Token tok) {
72 ssize_t num = 0;
73 ssize_t sign = 1;
74 for (size_t i = 0; i < tok.value.n; i++) {
75 char c = tok.value.start[i];
76 if (c == '-') {
77 sign = -1;
78 continue;
79 }
80 num = num * 10 + (c - '0');
81 }
82 Object *ret = object_alloc(tok, OBJ_TYPE_FIXNUM);
83 ret->fixnum = num * sign;
84 return ret;
85}
86
87Object *
88parse_bool(Token tok) {
89 if (tok.type == TOKEN_TRUE) {
90 return &obj_true;
91 }
92 return &obj_false;
93}
94
95Object *
96parse_string(Token tok) {
97 Object *ret = object_alloc(tok, OBJ_TYPE_STRING);
98 ret->text = tok.value;
99 return ret;
100}
101
102Object *
103parse_symbol(Token tok) {
104 // Check if symbol is a builtin procedure.
105 size_t n_builtins = sizeof(builtins) / sizeof(Object);
106 for (size_t i = 0; i < n_builtins; ++i) {
107 if (sv_equal(&tok.value, &builtins[i].builtin_text)) {
108 return &builtins[i];
109 }
110 }
111
112 Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL);
113 ret->text = tok.value;
114 return ret;
115} 17}
116 18
117Object * 19Node
118parse_lambda(Parser *parser, Errors *errors) { 20parse_number(Parser *parser) {
119 Token start = next_token(parser);
120 Object *lambda = object_alloc(start, OBJ_TYPE_LAMBDA);
121 array_init(lambda->params, 0);
122 array_init(lambda->body, 0);
123 lambda->env = NULL;
124
125 // Parse parameters.
126 Token tok = next_token(parser); 21 Token tok = next_token(parser);
127 if (tok.type == TOKEN_LPAREN) { 22 // TODO: do the dance.
128 while (has_next_token(parser)) { 23 // if error:
129 Token tok = next_token(parser); 24 // return (Node){.type = NODE_ERR};
130 if (tok.type == TOKEN_RPAREN) { 25
131 break; 26 // size_t
132 } 27 // scan_number_token(Scanner *scanner) {
133 if (tok.type != TOKEN_SYMBOL) { 28 // // TODO: This looks like more a parsing problem than lexer,
134 error_push(errors, (Error){ 29 // // consider moving it there. If starts with `-` and there is no
135 .type = ERR_TYPE_PARSER, 30 // // delimiter after, or if it starts with a number, it is
136 .value = ERR_WRONG_ARG_TYPE, 31 // // TOKEN_NUMBER.
137 .line = tok.line, 32 // char first = scan_next(scanner);
138 .col = tok.col, 33 // char second = scan_peek(scanner);
139 }); 34 // size_t n = 1;
140 } 35 // if (first == '0' && !is_delimiter(second)) {
141 Object *symbol = parse_symbol(tok); 36 // if (second == 'x') {
142 array_push(lambda->params, symbol); 37 // // Hex constant.
143 } 38 // scan_next(scanner);
144 } else if (tok.type != TOKEN_NIL) { 39 // n++;
145 error_push(errors, (Error){ 40 // if (is_delimiter(scan_peek(scanner))) {
146 .type = ERR_TYPE_PARSER, 41 // return 0;
147 .value = ERR_WRONG_ARG_TYPE, 42 // }
148 .line = tok.line, 43 // while (!is_delimiter(scan_peek(scanner))) {
149 .col = tok.col, 44 // char c = scan_next(scanner);
150 }); 45 // if (!(c >= '0' && c <= '9') &&
151 return NULL; 46 // !(c >= 'a' && c <= 'f') &&
152 } 47 // !(c >= 'A' && c <= 'F')) {
153 48 // return 0;
154 // Parse body. 49 // }
155 bool done = false; 50 // n++;
156 while (has_next_token(parser)) { 51 // }
157 Token tok = peek_token(parser); 52 // return n;
158 if (tok.type == TOKEN_RPAREN) { 53 // } else if (second == 'b') {
159 next_token(parser); 54 // // Binary constant.
160 done = true; 55 // scan_next(scanner);
161 break; 56 // n++;
162 } 57 // if (is_delimiter(scan_peek(scanner))) {
163 Object *expr = parse_tree(parser, errors); 58 // return 0;
164 array_push(lambda->body, expr); 59 // }
165 } 60 // while (!is_delimiter(scan_peek(scanner))) {
166 if (!done) { 61 // char c = scan_next(scanner);
167 error_push(errors, (Error){ 62 // if (!(c == '0' || c == '1')) {
168 .type = ERR_TYPE_PARSER, 63 // return 0;
169 .value = ERR_UNBALANCED_PAREN, 64 // }
170 .line = tok.line, 65 // n++;
171 .col = tok.col, 66 // }
172 }); 67 // }
173 return NULL; 68 // }
174 } 69
175 if (array_size(lambda->body) == 0) { 70 // // Decimal number or floating point.
176 error_push(errors, (Error){ 71 // bool has_dot = false;
177 .type = ERR_TYPE_PARSER, 72 // while (!is_delimiter(scan_peek(scanner))) {
178 .value = ERR_NOT_ENOUGH_ARGS, 73 // char c = scan_next(scanner);
179 .line = start.line, 74 // if (c == '.') {
180 .col = start.col, 75 // if (has_dot) {
181 }); 76 // return 0;
182 return NULL; 77 // }
183 } 78 // has_dot = true;
184 return lambda; 79 // } else if (!(c >= '0' && c <= '9')) {
185} 80 // return 0;
186 81 // }
187Object * 82 // n++;
188parse_if(Parser *parser, Errors *errors) { 83 // }
189 Token start = next_token(parser); 84 // return n;
190 Object *ret = object_alloc(start, OBJ_TYPE_IF); 85 // }
191 86 return (Node){.type = NODE_NUMBER, .string = 53};
192 Token tok = peek_token(parser); 87}
193 if (tok.type == TOKEN_RPAREN) { 88
194 error_push(errors, (Error){ 89Node
195 .type = ERR_TYPE_PARSER, 90parse_next(Parser *parser) {
196 .value = ERR_NOT_ENOUGH_ARGS, 91 if (!has_next(parser)) {
197 .line = tok.line, 92 return;
198 .col = tok.col,
199 });
200 return NULL;
201 }
202 ret->condition = parse_tree(parser, errors);
203
204 tok = peek_token(parser);
205 if (tok.type == TOKEN_RPAREN) {
206 error_push(errors, (Error){
207 .type = ERR_TYPE_PARSER,
208 .value = ERR_NOT_ENOUGH_ARGS,
209 .line = tok.line,
210 .col = tok.col,
211 });
212 return NULL;
213 }
214 ret->expr_true = parse_tree(parser, errors);
215
216 // Optional else expression.
217 tok = peek_token(parser);
218 if (tok.type == TOKEN_RPAREN) {
219 next_token(parser);
220 ret->expr_false = NULL;
221 return ret;
222 }
223 ret->expr_false = parse_tree(parser, errors);
224
225 tok = peek_token(parser);
226 if (tok.type == TOKEN_EOF) {
227 error_push(errors, (Error){
228 .type = ERR_TYPE_PARSER,
229 .value = ERR_UNBALANCED_PAREN,
230 .line = tok.line,
231 .col = tok.col,
232 });
233 return NULL;
234 }
235 if (tok.type != TOKEN_RPAREN) {
236 error_push(errors, (Error){
237 .type = ERR_TYPE_PARSER,
238 .value = ERR_TOO_MANY_ARGS,
239 .line = tok.line,
240 .col = tok.col,
241 });
242 return NULL;
243 }
244 next_token(parser);
245 return ret;
246}
247
248Object *
249parse_var(Parser *parser, Errors *errors) {
250 Token start = next_token(parser);
251 ObjectType type = start.type == TOKEN_DEF ? OBJ_TYPE_DEF : OBJ_TYPE_SET;
252 Object *ret = object_alloc(start, type);
253
254 // Variable name.
255 Token tok = peek_token(parser);
256 if (tok.type == TOKEN_RPAREN) {
257 error_push(errors, (Error){
258 .type = ERR_TYPE_PARSER,
259 .value = ERR_NOT_ENOUGH_ARGS,
260 .line = tok.line,
261 .col = tok.col,
262 });
263 return NULL;
264 }
265 if (tok.type != TOKEN_SYMBOL) {
266 error_push(errors, (Error){
267 .type = ERR_TYPE_PARSER,
268 .value = ERR_WRONG_ARG_TYPE,
269 .line = tok.line,
270 .col = tok.col,
271 });
272 return NULL;
273 }
274 ret->var_name = parse_tree(parser, errors);
275
276 // Variable value (expression).
277 tok = peek_token(parser);
278 if (tok.type == TOKEN_RPAREN) {
279 error_push(errors, (Error){
280 .type = ERR_TYPE_PARSER,
281 .value = ERR_NOT_ENOUGH_ARGS,
282 .line = tok.line,
283 .col = tok.col,
284 });
285 return NULL;
286 }
287 ret->var_expr = parse_tree(parser, errors);
288
289 tok = peek_token(parser);
290 if (tok.type == TOKEN_EOF) {
291 error_push(errors, (Error){
292 .type = ERR_TYPE_PARSER,
293 .value = ERR_UNBALANCED_PAREN,
294 .line = tok.line,
295 .col = tok.col,
296 });
297 return NULL;
298 }
299 if (tok.type != TOKEN_RPAREN) {
300 error_push(errors, (Error){
301 .type = ERR_TYPE_PARSER,
302 .value = ERR_TOO_MANY_ARGS,
303 .line = tok.line,
304 .col = tok.col,
305 });
306 return NULL;
307 }
308 next_token(parser);
309 return ret;
310}
311
312Object *
313parse_fun(Parser *parser, Errors *errors) {
314 Token start = next_token(parser);
315 Object *ret = object_alloc(start, OBJ_TYPE_DEF);
316
317 // Variable name.
318 Token tok = peek_token(parser);
319 if (tok.type == TOKEN_RPAREN) {
320 error_push(errors, (Error){
321 .type = ERR_TYPE_PARSER,
322 .value = ERR_NOT_ENOUGH_ARGS,
323 .line = tok.line,
324 .col = tok.col,
325 });
326 return NULL;
327 }
328 if (tok.type != TOKEN_SYMBOL) {
329 error_push(errors, (Error){
330 .type = ERR_TYPE_PARSER,
331 .value = ERR_WRONG_ARG_TYPE,
332 .line = tok.line,
333 .col = tok.col,
334 });
335 return NULL;
336 }
337 ret->var_name = parse_tree(parser, errors);
338
339 // Variable value (expression).
340 rewind_token(parser);
341 ret->var_expr = parse_lambda(parser, errors);
342 return ret;
343}
344
345Object *
346parse_list(Parser *parser, Errors *errors) {
347 if (errors->n != 0) {
348 return NULL;
349 } 93 }
350 94
351 Token tok = peek_token(parser); 95 Token tok = peek_token(parser);
352 switch (tok.type) { 96 switch (tok.type) {
353 case TOKEN_RPAREN: { 97 case TOKEN_NUMBER: {
354 error_push(errors, (Error){ 98 return parse_number(parser);
355 .type = ERR_TYPE_PARSER,
356 .value = ERR_UNBALANCED_PAREN,
357 .line = tok.line,
358 .col = tok.col,
359 });
360 return NULL;
361 } break;
362 case TOKEN_LAMBDA: { return parse_lambda(parser, errors); } break;
363 case TOKEN_IF: { return parse_if(parser, errors); } break;
364 case TOKEN_DEF: { return parse_var(parser, errors); } break;
365 case TOKEN_SET: { return parse_var(parser, errors); } break;
366 case TOKEN_FUN: { return parse_fun(parser, errors); } break;
367 default: break;
368 }
369
370 Token start = previous_token(parser);
371 Object *root = object_alloc(start, OBJ_TYPE_PAIR);
372 root->head = NULL;
373 root->tail = NULL;
374 root->n_elems = 0;
375 Object *current = root;
376 bool first = true;
377 while (has_next_token(parser)) {
378 Token tok = peek_token(parser);
379 current->head = parse_tree(parser, errors);
380 if (errors->n != 0 || current->head == NULL) {
381 return NULL;
382 }
383
384 if (first) {
385 if (!IS_SYMBOL(current->head) &&
386 !IS_LAMBDA(current->head) &&
387 !IS_BUILTIN(current->head)) {
388 error_push(errors, (Error){
389 .type = ERR_TYPE_PARSER,
390 .value = ERR_NOT_CALLABLE,
391 .line = tok.line,
392 .col = tok.col,
393 });
394 return NULL;
395 }
396 first = false;
397 }
398
399 tok = peek_token(parser);
400 if (tok.type == TOKEN_RPAREN) {
401 next_token(parser);
402 return root;
403 }
404
405 Object *next = object_alloc(start, OBJ_TYPE_PAIR);
406 next->head = NULL;
407 next->tail = NULL;
408 current->tail = next;
409 current = current->tail;
410 root->n_elems++;
411 }
412 error_push(errors, (Error){
413 .type = ERR_TYPE_PARSER,
414 .value = ERR_UNBALANCED_PAREN,
415 .line = start.line,
416 .col = start.col,
417 });
418 return NULL;
419}
420
421Object *
422parse_tree(Parser *parser, Errors *errors) {
423 Token tok = next_token(parser);
424 if (errors->n != 0) {
425 return NULL;
426 }
427 switch (tok.type) {
428 case TOKEN_FIXNUM: {
429 return parse_fixnum(tok);
430 } break;
431 case TOKEN_TRUE:
432 case TOKEN_FALSE: {
433 return parse_bool(tok);
434 } break;
435 case TOKEN_RPAREN: {
436 error_push(errors, (Error){
437 .type = ERR_TYPE_PARSER,
438 .value = ERR_UNBALANCED_PAREN,
439 .line = tok.line,
440 .col = tok.col,
441 });
442 return NULL;
443 } break;
444 case TOKEN_LPAREN: {
445 return parse_list(parser, errors);
446 } break; 99 } break;
447 case TOKEN_STRING: { 100 case TOKEN_STRING: {
448 return parse_string(tok); 101 // printf("STRING!\n");
449 } break; 102 next_token(parser); // FIXME: <====
450 case TOKEN_SYMBOL: { 103 return (Node){.type = NODE_STRING, .string = tok.value};
451 return parse_symbol(tok); 104 // TODO: parse_string(parser);
452 } break; 105 } break;
453 case TOKEN_NIL: { 106 case TOKEN_LPAREN: {
454 return &obj_nil; 107 // printf("LPAREN OH MY!\n");
108 // TODO: parse_list(parser);
455 } break; 109 } break;
456 default: { 110 default: {
457 break; 111 // printf("OH OHHHH\n");
112 // ...
458 } break; 113 } break;
459 } 114 }
460 return NULL; 115 next_token(parser); // FIXME: <====
461} 116 // TODO: this should be an error
462
463ssize_t
464find_var_index(Object **vars, Object *symbol) {
465 for (size_t i = 0; i < array_size(vars); i++) {
466 if (object_equal(vars[i], symbol)) {
467 return i;
468 }
469 }
470 return -1;
471}
472
473Object *
474symbol_in_env(Environment *env, Object *symbol) {
475 while (env != NULL) {
476 ssize_t idx = find_var_index(env->locals, symbol);
477 if (idx != -1) {
478 return env->local_values[idx];
479 }
480 idx = find_var_index(env->params, symbol);
481 if (idx != -1) {
482 return env->params[idx];
483 }
484 env = env->parent;
485 }
486 return NULL;
487}
488
489void
490insert_local(Environment *env, Object *symbol, Object *value) {
491 ssize_t idx = find_var_index(env->locals, symbol);
492 if (idx != -1) {
493 env->local_values[idx] = value;
494 return;
495 }
496 array_push(env->locals, symbol);
497 array_push(env->local_values, value);
498} 117}
499 118
500void 119void
501insert_params(Environment *env, Object *symbol) { 120print_node(Node node) {
502 if (find_var_index(env->params, symbol) != -1) { 121 switch (node.type) {
503 return; 122 case NODE_NUMBER: {
504 } 123 printf("%ld\n", node.number);
505 array_push(env->params, symbol);
506}
507
508void
509insert_captured(Environment *env, Object *symbol) {
510 if (find_var_index(env->captured, symbol) != -1) {
511 return;
512 }
513 array_push(env->captured, symbol);
514}
515
516void
517semantic_analysis(Environment *env, Object *obj, Errors *errors) {
518 if (obj == NULL || obj->visited) {
519 return;
520 }
521 obj->visited = true;
522 switch (obj->type) {
523 case OBJ_TYPE_SYMBOL: {
524 Object *found = NULL;
525
526 Environment *cur_env = env;
527 while (cur_env != NULL) {
528 ssize_t idx = find_var_index(cur_env->locals, obj);
529 if (idx != -1) {
530 found = cur_env->local_values[idx];
531 if (cur_env != env) {
532 insert_captured(env, obj);
533 }
534 break;
535 }
536 idx = find_var_index(cur_env->params, obj);
537 if (idx != -1) {
538 found = cur_env->params[idx];
539 break;
540 }
541 cur_env = cur_env->parent;
542 }
543
544 // Check if symbol is in other environments.
545 if (found == NULL) {
546 error_push(errors, (Error){
547 .type = ERR_TYPE_PARSER,
548 .value = ERR_SYMBOL_NOT_FOUND,
549 .line = obj->line,
550 .col = obj->col,
551 });
552 return;
553 }
554 semantic_analysis(env, found, errors);
555 } break;
556 case OBJ_TYPE_DEF: {
557 insert_local(env, obj->var_name, obj->var_expr);
558 semantic_analysis(env, obj->var_expr, errors);
559 } break;
560 case OBJ_TYPE_SET: {
561 semantic_analysis(env, obj->var_name, errors);
562 semantic_analysis(env, obj->var_expr, errors);
563 } break;
564 case OBJ_TYPE_IF: {
565 semantic_analysis(env, obj->condition, errors);
566 semantic_analysis(env, obj->expr_true, errors);
567 semantic_analysis(env, obj->expr_false, errors);
568 } break; 124 } break;
569 case OBJ_TYPE_PAIR: { 125 case NODE_STRING: {
570 Object *head = obj->head; 126 sv_write(&node.string);
571 if (IS_SYMBOL(head)) { 127 printf("\n");
572 head = symbol_in_env(env, head);
573 if (head == NULL) {
574 error_push(errors, (Error){
575 .type = ERR_TYPE_PARSER,
576 .value = ERR_SYMBOL_NOT_FOUND,
577 .line = obj->head->line,
578 .col = obj->head->col,
579 });
580 return;
581 }
582 }
583 if (IS_LAMBDA(head)) {
584 if (obj->n_elems != array_size(head->params)) {
585 error_push(errors, (Error){
586 .type = ERR_TYPE_PARSER,
587 .value = ERR_NOT_ENOUGH_ARGS,
588 .line = obj->line,
589 .col = obj->col
590 });
591 return;
592 }
593 }
594 semantic_analysis(env, obj->head, errors);
595 semantic_analysis(env, obj->tail, errors);
596 } break; 128 } break;
597 case OBJ_TYPE_LAMBDA: { 129 default: {
598 // Initialize scope for this lambda. 130 // ...
599 Environment *new_env = env_alloc(env);
600 obj->env = new_env;
601 for (size_t i = 0; i < array_size(obj->params); i++) {
602 insert_params(obj->env, obj->params[i]);
603 }
604 // Used for removing unnecessary statements.
605 Object **new_body = NULL;
606 array_init(new_body, 0);
607 for (size_t i = 0; i < array_size(obj->body); i++) {
608 Object *expr = obj->body[i];
609 if (i != array_size(obj->body) - 1) {
610 if (IS_FIXNUM(expr) ||
611 IS_STRING(expr) ||
612 IS_SYMBOL(expr) ||
613 IS_LAMBDA(expr) ||
614 IS_BOOL(expr) ||
615 IS_NIL(expr)) {
616 continue;
617 }
618 }
619 semantic_analysis(obj->env, expr, errors);
620 array_push(new_body, expr);
621 }
622 array_free(obj->body);
623 obj->body = new_body;
624 } break; 131 } break;
625 default: break;
626 } 132 }
627} 133}
628 134
629Program 135void
630parse(Token *tokens, Errors *errors) { 136parse(Token *tokens) {
631 array_init(roots, 0);
632 array_init(objects, 0);
633 array_init(environments, 0);
634 Parser parser = { 137 Parser parser = {
635 .tokens = tokens, 138 .tokens = tokens,
636 .current = 0, 139 .current = 0,
637 }; 140 };
638 141 while (has_next(&parser)) {
639 // Build initial ASTs. This also ensures the core grammar is correct. 142 Node node = parse_next(&parser);
640 while (has_next_token(&parser)) { 143 if (node.type == NODE_ERR) {
641 Object *root = parse_tree(&parser, errors); 144 return;
642 if (errors->n != 0) {
643 return (Program){0};
644 } 145 }
645 array_push(roots, root); 146 print_node(node);
646 } 147 // Token tok = next_token(&parser);
647 148 // print_token(tok);
648 // Prepare global environment.
649 Environment *env = env_alloc(NULL);
650
651 // Perform semantic analysis:
652 // 1. Populate symbol tables and ensure symbols are in scope when used.
653 // 2. Removing unnecessary expressions.
654 // 3. Verify number of arguments is correct in function calls.
655 Root *final_roots = NULL;
656 array_init(final_roots, 0);
657 for (size_t i = 0; i < array_size(roots); i++) {
658 Object *root = roots[i];
659 if (i != array_size(roots) - 1) {
660 if (IS_FIXNUM(root) ||
661 IS_STRING(root) ||
662 IS_SYMBOL(root) ||
663 IS_LAMBDA(root) ||
664 IS_BOOL(root) ||
665 IS_NIL(root)) {
666 continue;
667 }
668 }
669 array_push(final_roots, root);
670 semantic_analysis(env, root, errors);
671 if (errors->n != 0) {
672 array_free(final_roots);
673 return (Program){0};
674 }
675 }
676 array_free(roots);
677 roots = final_roots;
678
679 // TODO: Check if primitive procedures have been given the right number of
680 // arguments.
681 // TODO: Type check basic expressions (e.g. arithmetic/numeric comparisons).
682 // We can't be sure when we have functions unless the return type is known.
683
684 return (Program){roots, env};
685}
686
687Environment *
688env_alloc(Environment *parent) {
689 Environment *env = malloc(sizeof(Environment));
690 env->locals = NULL;
691 env->local_values = NULL;
692 env->params = NULL;
693 array_init(env->locals, 0);
694 array_init(env->local_values, 0);
695 array_init(env->params, 0);
696 array_init(env->captured, 0);
697 env->parent = parent;
698 array_push(environments, env);
699 return env;
700}
701
702Object *
703object_alloc(Token tok, ObjectType type) {
704 Object *node = malloc(sizeof(Object));
705 node->line = tok.line;
706 node->col = tok.col;
707 node->type = type;
708 node->visited = false;
709 array_push(objects, node);
710 return node;
711}
712
713void
714object_free(Object *node) {
715 if (node == NULL) {
716 return;
717 }
718 if (IS_LAMBDA(node)) {
719 array_free(node->params);
720 array_free(node->body);
721 }
722 free(node);
723}
724
725void
726free_objects(void) {
727 if (objects != NULL) {
728 for (size_t i = 0; i < array_size(objects); i++) {
729 object_free(objects[i]);
730 }
731 array_free(objects);
732 }
733 if (environments != NULL) {
734 for (size_t i = 0; i < array_size(environments); i++) {
735 Environment *env = environments[i];
736 array_free(env->locals);
737 array_free(env->local_values);
738 array_free(env->params);
739 array_free(env->captured);
740 free(env);
741 }
742 array_free(environments);
743 }
744 array_free(roots);
745}
746
747void
748display_pair(Object *obj) {
749 object_display(obj->head);
750 if (obj->tail == NULL) {
751 return;
752 }
753 if (IS_PAIR(obj->tail)) {
754 printf(" ");
755 display_pair(obj->tail);
756 } else {
757 printf(" . ");
758 object_display(obj->tail);
759 }
760}
761
762void
763object_display(Object *obj) {
764 if (obj == NULL) {
765 printf("#{error}");
766 return;
767 }
768 switch (obj->type) {
769 case OBJ_TYPE_FIXNUM: {
770 printf("%zd", obj->fixnum);
771 } break;
772 case OBJ_TYPE_TRUE: {
773 printf("true");
774 } break;
775 case OBJ_TYPE_FALSE: {
776 printf("false");
777 } break;
778 case OBJ_TYPE_NIL: {
779 printf("()");
780 } break;
781 case OBJ_TYPE_STRING: {
782 printf("\"%.*s\"", (int)obj->text.n, obj->text.start);
783 } break;
784 case OBJ_TYPE_SYMBOL: {
785 printf(":%.*s", (int)obj->text.n, obj->text.start);
786 } break;
787 case OBJ_TYPE_PAIR: {
788 printf("(");
789 display_pair(obj);
790 printf(")");
791 } break;
792 case OBJ_TYPE_LAMBDA: {
793 printf("#{ lambda ( ");
794 for (size_t i = 0; i < array_size(obj->params); i++) {
795 object_display(obj->params[i]);
796 printf(" ");
797 }
798 printf(") ");
799 for (size_t i = 0; i < array_size(obj->body); i++) {
800 object_display(obj->body[i]);
801 printf(" ");
802 }
803 printf("}");
804 } break;
805 case OBJ_TYPE_IF: {
806 printf("#{ if ");
807 object_display(obj->condition);
808 printf(" ");
809 object_display(obj->expr_true);
810 if (obj->expr_false != NULL) {
811 printf(" ");
812 object_display(obj->expr_false);
813 }
814 printf(" }");
815 } break;
816 case OBJ_TYPE_DEF: {
817 printf("#{ def ");
818 object_display(obj->var_name);
819 printf(" ");
820 object_display(obj->var_expr);
821 printf(" }");
822 } break;
823 case OBJ_TYPE_SET: {
824 printf("#{ set! ");
825 object_display(obj->var_name);
826 printf(" ");
827 object_display(obj->var_expr);
828 printf(" }");
829 } break;
830 case OBJ_TYPE_BUILTIN: {
831 printf("%.*s", (int)obj->builtin_text.n, obj->builtin_text.start);
832 } break;
833 }
834 return;
835}
836
837bool
838object_equal(Object *a, Object *b) {
839 if (a == NULL || b == NULL || a->type != b->type) {
840 return false;
841 }
842 switch (a->type) {
843 case OBJ_TYPE_TRUE:
844 case OBJ_TYPE_FALSE: {
845 return true;
846 } break;
847 case OBJ_TYPE_FIXNUM: {
848 return a->fixnum == b->fixnum;
849 } break;
850 case OBJ_TYPE_SYMBOL:
851 case OBJ_TYPE_STRING: {
852 return sv_equal(&a->text, &b->text);
853 } break;
854 case OBJ_TYPE_BUILTIN: {
855 return a->builtin == b->builtin;
856 } break;
857 case OBJ_TYPE_PAIR: {
858 Object *a_head = a->head;
859 Object *b_head = b->head;
860 if (!object_equal(a_head, b_head)) {
861 return false;
862 }
863 Object *a_tail = a->tail;
864 Object *b_tail = b->tail;
865 while (a_tail != NULL && b_tail != NULL) {
866 if (!object_equal(a_head, b_head)) {
867 return false;
868 }
869 a_head = a_tail->head;
870 b_head = b_tail->head;
871 a_tail = a_tail->tail;
872 b_tail = b_tail->tail;
873 }
874 if (a_tail == b_tail && object_equal(a_head, b_head)) {
875 return true;
876 }
877 return false;
878 } break;
879 case OBJ_TYPE_LAMBDA: {
880 size_t n_params_a = array_size(a->params);
881 size_t n_params_b = array_size(b->params);
882 size_t n_expr_a = array_size(a->body);
883 size_t n_expr_b = array_size(b->body);
884 if (n_params_a != n_params_b || n_expr_a != n_expr_b) {
885 return false;
886 }
887 for (size_t i = 0; i < n_params_a; ++i) {
888 if (!object_equal(a->params[i], b->params[i])) {
889 return false;
890 }
891 }
892 for (size_t i = 0; i < n_expr_a; ++i) {
893 if (!object_equal(a->body[i], b->body[i])) {
894 return false;
895 }
896 }
897 return true;
898 } break;
899 default: break;
900 } 149 }
901 return false;
902} 150}
diff --git a/src/parser.h b/src/parser.h
index 2604be4..3e016d3 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -3,162 +3,50 @@
3 3
4#include "lexer.h" 4#include "lexer.h"
5 5
6typedef struct Environment {
7 struct Object **locals;
8 struct Object **local_values;
9 struct Object **params;
10 struct Object **captured;
11 struct Environment *parent;
12} Environment;
13
14typedef enum ObjectType {
15 OBJ_TYPE_NIL,
16 OBJ_TYPE_TRUE,
17 OBJ_TYPE_FALSE,
18 OBJ_TYPE_FIXNUM,
19 OBJ_TYPE_SYMBOL,
20 OBJ_TYPE_STRING,
21 OBJ_TYPE_PAIR,
22 OBJ_TYPE_LAMBDA,
23 OBJ_TYPE_IF,
24 OBJ_TYPE_DEF,
25 OBJ_TYPE_SET,
26 OBJ_TYPE_BUILTIN,
27} ObjectType;
28
29typedef enum Builtin {
30 BUILTIN_ADD,
31 BUILTIN_SUB,
32 BUILTIN_MUL,
33 BUILTIN_DIV,
34 BUILTIN_MOD,
35 BUILTIN_EQ,
36 BUILTIN_LT,
37 BUILTIN_GT,
38 BUILTIN_LE,
39 BUILTIN_GE,
40 BUILTIN_NOT,
41 BUILTIN_AND,
42 BUILTIN_OR,
43 BUILTIN_IS_NIL,
44 BUILTIN_IS_ZERO,
45 BUILTIN_IS_FIXNUM,
46 BUILTIN_IS_BOOL,
47 BUILTIN_PRINT,
48 BUILTIN_CONS,
49 BUILTIN_CAR,
50 BUILTIN_CDR,
51} Builtin;
52
53typedef struct Object {
54 ObjectType type;
55 union {
56 // OBJ_TYPE_FIXNUM
57 ssize_t fixnum;
58
59 // OBJ_TYPE_STRING
60 // OBJ_TYPE_SYMBOL
61 StringView text;
62
63 // OBJ_TYPE_PAIR
64 struct {
65 struct Object *head;
66 struct Object *tail;
67 size_t n_elems;
68 };
69
70 // OBJ_TYPE_LAMBDA
71 struct {
72 struct Object **params;
73 struct Object **body;
74 Environment *env;
75 };
76
77 // OBJ_TYPE_IF
78 struct {
79 struct Object *condition;
80 struct Object *expr_true;
81 struct Object *expr_false;
82 };
83
84 // OBJ_TYPE_DEF
85 // OBJ_TYPE_SET
86 struct {
87 struct Object *var_name;
88 struct Object *var_expr;
89 };
90
91 // OBJ_TYPE_BUILTIN
92 struct {
93 Builtin builtin;
94 StringView builtin_text;
95 };
96 };
97
98 bool visited;
99 size_t line;
100 size_t col;
101} Object;
102
103typedef struct Parser { 6typedef struct Parser {
104 Token *tokens; 7 Token *tokens;
105 size_t current; 8 size_t current;
106} Parser; 9} Parser;
107 10
108typedef Object* Root; 11typedef enum NodeType {
109 12 NODE_ERR,
110typedef struct Program { 13 // NODE_FUNCALL,
111 Root *roots; 14 // NODE_U64,
112 Environment *env; 15 // NODE_U32,
113} Program; 16 // NODE_U16,
114 17 // NODE_U8,
115// Token scanner. 18 // NODE_S64,
116Token next_token(Parser *parser); 19 // NODE_S32,
117Token previous_token(Parser *parser); 20 // NODE_S16,
118Token rewind_token(Parser *parser); 21 // NODE_S8,
119Token peek_token(const Parser *parser); 22 NODE_NUMBER,
120bool has_next_token(const Parser *parser); 23 NODE_STRING,
121 24} NodeType;
122// Parsing operations. 25
123Object * parse_tree(Parser *parser, Errors *errors); 26typedef struct Node {
124Object * parse_symbol(Token tok); 27 NodeType type;
125Object * parse_string(Token tok);
126Object * parse_bool(Token tok);
127Object * parse_fixnum(Token tok);
128Object * parse_list(Parser *parser, Errors *errors);
129Object * parse_lambda(Parser *parser, Errors *errors);
130Object * parse_if(Parser *parser, Errors *errors);
131Object * parse_var(Parser *parser, Errors *errors);
132Program parse(Token *tokens, Errors *errors);
133 28
134// Object operations. 29 union {
135void object_display(Object *obj); 30 // Integer numbers.
136bool object_equal(Object *a, Object *b); 31 // u64 as_u64;
137 32 // u32 as_u32;
138// Manage resources. 33 // u16 as_u16;
139Environment * env_alloc(Environment *parent); 34 // u8 as_u8;
140Object * object_alloc(Token tok, ObjectType type); 35 // s64 as_s64;
141void object_free(Object *node); 36 // s32 as_s32;
142void free_objects(void); 37 // s16 as_s16;
143 38 // s8 as_s8;
144// 39 s64 number;
145// Helper macros. 40
146// 41 // String.
147 42 StringView string;
148// Type checking. 43 // struct {
149#define IS_NIL(VAL) ((VAL)->type == OBJ_TYPE_NIL) 44 // u8 *str;
150#define IS_TRUE(VAL) ((VAL)->type != OBJ_TYPE_FALSE) 45 // u64 n;
151#define IS_FALSE(VAL) ((VAL)->type == OBJ_TYPE_FALSE) 46 // } as_str;
152#define IS_BOOL(VAL) \ 47 };
153 (((VAL)->type == OBJ_TYPE_FALSE) || ((VAL)->type == OBJ_TYPE_TRUE)) 48} Node;
154#define IS_FIXNUM(VAL) ((VAL)->type == OBJ_TYPE_FIXNUM)
155#define IS_STRING(VAL) ((VAL)->type == OBJ_TYPE_STRING)
156#define IS_SYMBOL(VAL) ((VAL)->type == OBJ_TYPE_SYMBOL)
157#define IS_PAIR(VAL) ((VAL)->type == OBJ_TYPE_PAIR)
158#define IS_LAMBDA(VAL) ((VAL)->type == OBJ_TYPE_LAMBDA)
159#define IS_BUILTIN(VAL) ((VAL)->type == OBJ_TYPE_BUILTIN)
160 49
161// Debug. 50void parse(Token *tokens);
162#define OBJ_PRINT(OBJ) object_display(OBJ); printf("\n");
163 51
164#endif // BDL_PARSER_H 52#endif // BDL_PARSER_H