diff options
author | Bad Diode <bd@badd10de.dev> | 2022-02-12 19:06:09 +0100 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2022-02-12 19:06:09 +0100 |
commit | fa32ad3224b3e362e5f79eee8785334f4bebdbc8 (patch) | |
tree | 4c5cb46baa8dc010921d755f157d6e23db9ce5d0 /src | |
parent | c4765a539ee01625dd310a02f0be16ec9a64e2e4 (diff) | |
download | bdl-fa32ad3224b3e362e5f79eee8785334f4bebdbc8.tar.gz bdl-fa32ad3224b3e362e5f79eee8785334f4bebdbc8.zip |
Add boilerplate for parser
Diffstat (limited to 'src')
-rw-r--r-- | src/lexer.c | 62 | ||||
-rw-r--r-- | src/main.c | 11 | ||||
-rw-r--r-- | src/parser.c | 968 | ||||
-rw-r--r-- | src/parser.h | 188 |
4 files changed, 154 insertions, 1075 deletions
diff --git a/src/lexer.c b/src/lexer.c index 5175b1c..f63ff4f 100644 --- a/src/lexer.c +++ b/src/lexer.c | |||
@@ -141,67 +141,6 @@ is_delimiter(char c) { | |||
141 | return false; | 141 | return false; |
142 | } | 142 | } |
143 | 143 | ||
144 | size_t | ||
145 | scan_number_token(Scanner *scanner) { | ||
146 | // TODO: This looks like more a parsing problem than lexer, | ||
147 | // consider moving it there. If starts with `-` and there is no | ||
148 | // delimiter after, or if it starts with a number, it is | ||
149 | // TOKEN_NUMBER. | ||
150 | char first = scan_next(scanner); | ||
151 | char second = scan_peek(scanner); | ||
152 | size_t n = 1; | ||
153 | if (first == '0' && !is_delimiter(second)) { | ||
154 | if (second == 'x') { | ||
155 | // Hex constant. | ||
156 | scan_next(scanner); | ||
157 | n++; | ||
158 | if (is_delimiter(scan_peek(scanner))) { | ||
159 | return 0; | ||
160 | } | ||
161 | while (!is_delimiter(scan_peek(scanner))) { | ||
162 | char c = scan_next(scanner); | ||
163 | if (!(c >= '0' && c <= '9') && | ||
164 | !(c >= 'a' && c <= 'f') && | ||
165 | !(c >= 'A' && c <= 'F')) { | ||
166 | return 0; | ||
167 | } | ||
168 | n++; | ||
169 | } | ||
170 | return n; | ||
171 | } else if (second == 'b') { | ||
172 | // Binary constant. | ||
173 | scan_next(scanner); | ||
174 | n++; | ||
175 | if (is_delimiter(scan_peek(scanner))) { | ||
176 | return 0; | ||
177 | } | ||
178 | while (!is_delimiter(scan_peek(scanner))) { | ||
179 | char c = scan_next(scanner); | ||
180 | if (!(c == '0' || c == '1')) { | ||
181 | return 0; | ||
182 | } | ||
183 | n++; | ||
184 | } | ||
185 | } | ||
186 | } | ||
187 | |||
188 | // Decimal number or floating point. | ||
189 | bool has_dot = false; | ||
190 | while (!is_delimiter(scan_peek(scanner))) { | ||
191 | char c = scan_next(scanner); | ||
192 | if (c == '.') { | ||
193 | if (has_dot) { | ||
194 | return 0; | ||
195 | } | ||
196 | has_dot = true; | ||
197 | } else if (!(c >= '0' && c <= '9')) { | ||
198 | return 0; | ||
199 | } | ||
200 | n++; | ||
201 | } | ||
202 | return n; | ||
203 | } | ||
204 | |||
205 | TokenType | 144 | TokenType |
206 | find_token_type(const StringView value) { | 145 | find_token_type(const StringView value) { |
207 | for (size_t i = 0; i < sizeof(keywords) / sizeof(Keyword); i++) { | 146 | for (size_t i = 0; i < sizeof(keywords) / sizeof(Keyword); i++) { |
@@ -285,6 +224,7 @@ tokenize(const StringView *sv) { | |||
285 | } | 224 | } |
286 | size_t n = 1; | 225 | size_t n = 1; |
287 | bool is_number = c == '-' && !is_delimiter(scan_peek(&scanner)); | 226 | bool is_number = c == '-' && !is_delimiter(scan_peek(&scanner)); |
227 | is_number = c == '+' && !is_delimiter(scan_peek(&scanner)); | ||
288 | is_number = is_number || (c >= '0' && c <= '9'); | 228 | is_number = is_number || (c >= '0' && c <= '9'); |
289 | if (is_number) { | 229 | if (is_number) { |
290 | while (!is_delimiter(scan_peek(&scanner))) { | 230 | while (!is_delimiter(scan_peek(&scanner))) { |
@@ -7,7 +7,7 @@ | |||
7 | #include "string_view.c" | 7 | #include "string_view.c" |
8 | #include "errors.c" | 8 | #include "errors.c" |
9 | #include "lexer.c" | 9 | #include "lexer.c" |
10 | // #include "parser.c" | 10 | #include "parser.c" |
11 | // #include "ir.h" | 11 | // #include "ir.h" |
12 | // #include "compiler.h" | 12 | // #include "compiler.h" |
13 | 13 | ||
@@ -25,11 +25,14 @@ void | |||
25 | process_source(const StringView *source, const char *file_name) { | 25 | process_source(const StringView *source, const char *file_name) { |
26 | // Read tokens. | 26 | // Read tokens. |
27 | Token *tokens = tokenize(source); | 27 | Token *tokens = tokenize(source); |
28 | print_tokens(tokens); | 28 | // print_tokens(tokens); |
29 | check_errors(file_name); | ||
30 | |||
31 | // Parser. | ||
32 | parse(tokens); | ||
33 | // print_program(program); | ||
29 | check_errors(file_name); | 34 | check_errors(file_name); |
30 | 35 | ||
31 | // // Parser. | ||
32 | // Program program = parse(tokens, &errors); | ||
33 | // if (errors.n != 0) { | 36 | // if (errors.n != 0) { |
34 | // report_errors(&errors, file_name); | 37 | // report_errors(&errors, file_name); |
35 | // free_objects(); | 38 | // free_objects(); |
diff --git a/src/parser.c b/src/parser.c index a5e2b42..dfc3e56 100644 --- a/src/parser.c +++ b/src/parser.c | |||
@@ -1,902 +1,150 @@ | |||
1 | #include "parser.h" | 1 | #include "parser.h" |
2 | #include "darray.h" | 2 | #include "darray.h" |
3 | 3 | ||
4 | static Object **objects = NULL; | ||
5 | static Root *roots = NULL; | ||
6 | static Environment **environments = NULL; | ||
7 | |||
8 | // Builtin procedures. | ||
9 | static Object builtins[] = { | ||
10 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_ADD, .builtin_text = STRING("+") }, | ||
11 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_SUB, .builtin_text = STRING("-") }, | ||
12 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_MUL, .builtin_text = STRING("*") }, | ||
13 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_DIV, .builtin_text = STRING("/") }, | ||
14 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_MOD, .builtin_text = STRING("%") }, | ||
15 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_EQ, .builtin_text = STRING("=") }, | ||
16 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_LT, .builtin_text = STRING("<") }, | ||
17 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_GT, .builtin_text = STRING(">") }, | ||
18 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_LE, .builtin_text = STRING("<=") }, | ||
19 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_GE, .builtin_text = STRING(">=") }, | ||
20 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_NOT, .builtin_text = STRING("not") }, | ||
21 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_AND, .builtin_text = STRING("and") }, | ||
22 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_OR, .builtin_text = STRING("or") }, | ||
23 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_NIL, .builtin_text = STRING("nil?") }, | ||
24 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_ZERO, .builtin_text = STRING("zero?") }, | ||
25 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_FIXNUM, .builtin_text = STRING("fixnum?") }, | ||
26 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_IS_BOOL, .builtin_text = STRING("bool?") }, | ||
27 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_PRINT, .builtin_text = STRING("print") }, | ||
28 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_CONS, .builtin_text = STRING("cons") }, | ||
29 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_CAR, .builtin_text = STRING("car") }, | ||
30 | { .type = OBJ_TYPE_BUILTIN, .builtin = BUILTIN_CDR, .builtin_text = STRING("cdr") }, | ||
31 | }; | ||
32 | |||
33 | // Static singleton objects. | ||
34 | static Object obj_nil = { .type = OBJ_TYPE_NIL }; | ||
35 | static Object obj_true = { .type = OBJ_TYPE_TRUE }; | ||
36 | static Object obj_false = { .type = OBJ_TYPE_FALSE }; | ||
37 | |||
38 | Token | ||
39 | peek_token(const Parser *parser) { | ||
40 | if (parser->current >= array_size(parser->tokens)) { | ||
41 | return parser->tokens[array_size(parser->tokens) - 1]; | ||
42 | } | ||
43 | return parser->tokens[parser->current]; | ||
44 | } | ||
45 | |||
46 | Token | 4 | Token |
47 | next_token(Parser *parser) { | 5 | next_token(Parser *parser) { |
48 | if (parser->current >= array_size(parser->tokens)) { | ||
49 | return parser->tokens[array_size(parser->tokens) - 1]; | ||
50 | } | ||
51 | return parser->tokens[parser->current++]; | 6 | return parser->tokens[parser->current++]; |
52 | } | 7 | } |
53 | 8 | ||
54 | Token | 9 | Token |
55 | previous_token(Parser *parser) { | 10 | peek_token(Parser *parser) { |
56 | return parser->tokens[parser->current - 1]; | 11 | return parser->tokens[parser->current]; |
57 | } | ||
58 | |||
59 | Token | ||
60 | rewind_token(Parser *parser) { | ||
61 | return parser->tokens[--parser->current]; | ||
62 | } | 12 | } |
63 | 13 | ||
64 | bool | 14 | bool |
65 | has_next_token(const Parser *parser) { | 15 | has_next(Parser *parser) { |
66 | return parser->current < array_size(parser->tokens) | 16 | return parser->current < array_size(parser->tokens); |
67 | && peek_token(parser).type != TOKEN_EOF; | ||
68 | } | ||
69 | |||
70 | Object * | ||
71 | parse_fixnum(Token tok) { | ||
72 | ssize_t num = 0; | ||
73 | ssize_t sign = 1; | ||
74 | for (size_t i = 0; i < tok.value.n; i++) { | ||
75 | char c = tok.value.start[i]; | ||
76 | if (c == '-') { | ||
77 | sign = -1; | ||
78 | continue; | ||
79 | } | ||
80 | num = num * 10 + (c - '0'); | ||
81 | } | ||
82 | Object *ret = object_alloc(tok, OBJ_TYPE_FIXNUM); | ||
83 | ret->fixnum = num * sign; | ||
84 | return ret; | ||
85 | } | ||
86 | |||
87 | Object * | ||
88 | parse_bool(Token tok) { | ||
89 | if (tok.type == TOKEN_TRUE) { | ||
90 | return &obj_true; | ||
91 | } | ||
92 | return &obj_false; | ||
93 | } | ||
94 | |||
95 | Object * | ||
96 | parse_string(Token tok) { | ||
97 | Object *ret = object_alloc(tok, OBJ_TYPE_STRING); | ||
98 | ret->text = tok.value; | ||
99 | return ret; | ||
100 | } | ||
101 | |||
102 | Object * | ||
103 | parse_symbol(Token tok) { | ||
104 | // Check if symbol is a builtin procedure. | ||
105 | size_t n_builtins = sizeof(builtins) / sizeof(Object); | ||
106 | for (size_t i = 0; i < n_builtins; ++i) { | ||
107 | if (sv_equal(&tok.value, &builtins[i].builtin_text)) { | ||
108 | return &builtins[i]; | ||
109 | } | ||
110 | } | ||
111 | |||
112 | Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL); | ||
113 | ret->text = tok.value; | ||
114 | return ret; | ||
115 | } | 17 | } |
116 | 18 | ||
117 | Object * | 19 | Node |
118 | parse_lambda(Parser *parser, Errors *errors) { | 20 | parse_number(Parser *parser) { |
119 | Token start = next_token(parser); | ||
120 | Object *lambda = object_alloc(start, OBJ_TYPE_LAMBDA); | ||
121 | array_init(lambda->params, 0); | ||
122 | array_init(lambda->body, 0); | ||
123 | lambda->env = NULL; | ||
124 | |||
125 | // Parse parameters. | ||
126 | Token tok = next_token(parser); | 21 | Token tok = next_token(parser); |
127 | if (tok.type == TOKEN_LPAREN) { | 22 | // TODO: do the dance. |
128 | while (has_next_token(parser)) { | 23 | // if error: |
129 | Token tok = next_token(parser); | 24 | // return (Node){.type = NODE_ERR}; |
130 | if (tok.type == TOKEN_RPAREN) { | 25 | |
131 | break; | 26 | // size_t |
132 | } | 27 | // scan_number_token(Scanner *scanner) { |
133 | if (tok.type != TOKEN_SYMBOL) { | 28 | // // TODO: This looks like more a parsing problem than lexer, |
134 | error_push(errors, (Error){ | 29 | // // consider moving it there. If starts with `-` and there is no |
135 | .type = ERR_TYPE_PARSER, | 30 | // // delimiter after, or if it starts with a number, it is |
136 | .value = ERR_WRONG_ARG_TYPE, | 31 | // // TOKEN_NUMBER. |
137 | .line = tok.line, | 32 | // char first = scan_next(scanner); |
138 | .col = tok.col, | 33 | // char second = scan_peek(scanner); |
139 | }); | 34 | // size_t n = 1; |
140 | } | 35 | // if (first == '0' && !is_delimiter(second)) { |
141 | Object *symbol = parse_symbol(tok); | 36 | // if (second == 'x') { |
142 | array_push(lambda->params, symbol); | 37 | // // Hex constant. |
143 | } | 38 | // scan_next(scanner); |
144 | } else if (tok.type != TOKEN_NIL) { | 39 | // n++; |
145 | error_push(errors, (Error){ | 40 | // if (is_delimiter(scan_peek(scanner))) { |
146 | .type = ERR_TYPE_PARSER, | 41 | // return 0; |
147 | .value = ERR_WRONG_ARG_TYPE, | 42 | // } |
148 | .line = tok.line, | 43 | // while (!is_delimiter(scan_peek(scanner))) { |
149 | .col = tok.col, | 44 | // char c = scan_next(scanner); |
150 | }); | 45 | // if (!(c >= '0' && c <= '9') && |
151 | return NULL; | 46 | // !(c >= 'a' && c <= 'f') && |
152 | } | 47 | // !(c >= 'A' && c <= 'F')) { |
153 | 48 | // return 0; | |
154 | // Parse body. | 49 | // } |
155 | bool done = false; | 50 | // n++; |
156 | while (has_next_token(parser)) { | 51 | // } |
157 | Token tok = peek_token(parser); | 52 | // return n; |
158 | if (tok.type == TOKEN_RPAREN) { | 53 | // } else if (second == 'b') { |
159 | next_token(parser); | 54 | // // Binary constant. |
160 | done = true; | 55 | // scan_next(scanner); |
161 | break; | 56 | // n++; |
162 | } | 57 | // if (is_delimiter(scan_peek(scanner))) { |
163 | Object *expr = parse_tree(parser, errors); | 58 | // return 0; |
164 | array_push(lambda->body, expr); | 59 | // } |
165 | } | 60 | // while (!is_delimiter(scan_peek(scanner))) { |
166 | if (!done) { | 61 | // char c = scan_next(scanner); |
167 | error_push(errors, (Error){ | 62 | // if (!(c == '0' || c == '1')) { |
168 | .type = ERR_TYPE_PARSER, | 63 | // return 0; |
169 | .value = ERR_UNBALANCED_PAREN, | 64 | // } |
170 | .line = tok.line, | 65 | // n++; |
171 | .col = tok.col, | 66 | // } |
172 | }); | 67 | // } |
173 | return NULL; | 68 | // } |
174 | } | 69 | |
175 | if (array_size(lambda->body) == 0) { | 70 | // // Decimal number or floating point. |
176 | error_push(errors, (Error){ | 71 | // bool has_dot = false; |
177 | .type = ERR_TYPE_PARSER, | 72 | // while (!is_delimiter(scan_peek(scanner))) { |
178 | .value = ERR_NOT_ENOUGH_ARGS, | 73 | // char c = scan_next(scanner); |
179 | .line = start.line, | 74 | // if (c == '.') { |
180 | .col = start.col, | 75 | // if (has_dot) { |
181 | }); | 76 | // return 0; |
182 | return NULL; | 77 | // } |
183 | } | 78 | // has_dot = true; |
184 | return lambda; | 79 | // } else if (!(c >= '0' && c <= '9')) { |
185 | } | 80 | // return 0; |
186 | 81 | // } | |
187 | Object * | 82 | // n++; |
188 | parse_if(Parser *parser, Errors *errors) { | 83 | // } |
189 | Token start = next_token(parser); | 84 | // return n; |
190 | Object *ret = object_alloc(start, OBJ_TYPE_IF); | 85 | // } |
191 | 86 | return (Node){.type = NODE_NUMBER, .string = 53}; | |
192 | Token tok = peek_token(parser); | 87 | } |
193 | if (tok.type == TOKEN_RPAREN) { | 88 | |
194 | error_push(errors, (Error){ | 89 | Node |
195 | .type = ERR_TYPE_PARSER, | 90 | parse_next(Parser *parser) { |
196 | .value = ERR_NOT_ENOUGH_ARGS, | 91 | if (!has_next(parser)) { |
197 | .line = tok.line, | 92 | return; |
198 | .col = tok.col, | ||
199 | }); | ||
200 | return NULL; | ||
201 | } | ||
202 | ret->condition = parse_tree(parser, errors); | ||
203 | |||
204 | tok = peek_token(parser); | ||
205 | if (tok.type == TOKEN_RPAREN) { | ||
206 | error_push(errors, (Error){ | ||
207 | .type = ERR_TYPE_PARSER, | ||
208 | .value = ERR_NOT_ENOUGH_ARGS, | ||
209 | .line = tok.line, | ||
210 | .col = tok.col, | ||
211 | }); | ||
212 | return NULL; | ||
213 | } | ||
214 | ret->expr_true = parse_tree(parser, errors); | ||
215 | |||
216 | // Optional else expression. | ||
217 | tok = peek_token(parser); | ||
218 | if (tok.type == TOKEN_RPAREN) { | ||
219 | next_token(parser); | ||
220 | ret->expr_false = NULL; | ||
221 | return ret; | ||
222 | } | ||
223 | ret->expr_false = parse_tree(parser, errors); | ||
224 | |||
225 | tok = peek_token(parser); | ||
226 | if (tok.type == TOKEN_EOF) { | ||
227 | error_push(errors, (Error){ | ||
228 | .type = ERR_TYPE_PARSER, | ||
229 | .value = ERR_UNBALANCED_PAREN, | ||
230 | .line = tok.line, | ||
231 | .col = tok.col, | ||
232 | }); | ||
233 | return NULL; | ||
234 | } | ||
235 | if (tok.type != TOKEN_RPAREN) { | ||
236 | error_push(errors, (Error){ | ||
237 | .type = ERR_TYPE_PARSER, | ||
238 | .value = ERR_TOO_MANY_ARGS, | ||
239 | .line = tok.line, | ||
240 | .col = tok.col, | ||
241 | }); | ||
242 | return NULL; | ||
243 | } | ||
244 | next_token(parser); | ||
245 | return ret; | ||
246 | } | ||
247 | |||
248 | Object * | ||
249 | parse_var(Parser *parser, Errors *errors) { | ||
250 | Token start = next_token(parser); | ||
251 | ObjectType type = start.type == TOKEN_DEF ? OBJ_TYPE_DEF : OBJ_TYPE_SET; | ||
252 | Object *ret = object_alloc(start, type); | ||
253 | |||
254 | // Variable name. | ||
255 | Token tok = peek_token(parser); | ||
256 | if (tok.type == TOKEN_RPAREN) { | ||
257 | error_push(errors, (Error){ | ||
258 | .type = ERR_TYPE_PARSER, | ||
259 | .value = ERR_NOT_ENOUGH_ARGS, | ||
260 | .line = tok.line, | ||
261 | .col = tok.col, | ||
262 | }); | ||
263 | return NULL; | ||
264 | } | ||
265 | if (tok.type != TOKEN_SYMBOL) { | ||
266 | error_push(errors, (Error){ | ||
267 | .type = ERR_TYPE_PARSER, | ||
268 | .value = ERR_WRONG_ARG_TYPE, | ||
269 | .line = tok.line, | ||
270 | .col = tok.col, | ||
271 | }); | ||
272 | return NULL; | ||
273 | } | ||
274 | ret->var_name = parse_tree(parser, errors); | ||
275 | |||
276 | // Variable value (expression). | ||
277 | tok = peek_token(parser); | ||
278 | if (tok.type == TOKEN_RPAREN) { | ||
279 | error_push(errors, (Error){ | ||
280 | .type = ERR_TYPE_PARSER, | ||
281 | .value = ERR_NOT_ENOUGH_ARGS, | ||
282 | .line = tok.line, | ||
283 | .col = tok.col, | ||
284 | }); | ||
285 | return NULL; | ||
286 | } | ||
287 | ret->var_expr = parse_tree(parser, errors); | ||
288 | |||
289 | tok = peek_token(parser); | ||
290 | if (tok.type == TOKEN_EOF) { | ||
291 | error_push(errors, (Error){ | ||
292 | .type = ERR_TYPE_PARSER, | ||
293 | .value = ERR_UNBALANCED_PAREN, | ||
294 | .line = tok.line, | ||
295 | .col = tok.col, | ||
296 | }); | ||
297 | return NULL; | ||
298 | } | ||
299 | if (tok.type != TOKEN_RPAREN) { | ||
300 | error_push(errors, (Error){ | ||
301 | .type = ERR_TYPE_PARSER, | ||
302 | .value = ERR_TOO_MANY_ARGS, | ||
303 | .line = tok.line, | ||
304 | .col = tok.col, | ||
305 | }); | ||
306 | return NULL; | ||
307 | } | ||
308 | next_token(parser); | ||
309 | return ret; | ||
310 | } | ||
311 | |||
312 | Object * | ||
313 | parse_fun(Parser *parser, Errors *errors) { | ||
314 | Token start = next_token(parser); | ||
315 | Object *ret = object_alloc(start, OBJ_TYPE_DEF); | ||
316 | |||
317 | // Variable name. | ||
318 | Token tok = peek_token(parser); | ||
319 | if (tok.type == TOKEN_RPAREN) { | ||
320 | error_push(errors, (Error){ | ||
321 | .type = ERR_TYPE_PARSER, | ||
322 | .value = ERR_NOT_ENOUGH_ARGS, | ||
323 | .line = tok.line, | ||
324 | .col = tok.col, | ||
325 | }); | ||
326 | return NULL; | ||
327 | } | ||
328 | if (tok.type != TOKEN_SYMBOL) { | ||
329 | error_push(errors, (Error){ | ||
330 | .type = ERR_TYPE_PARSER, | ||
331 | .value = ERR_WRONG_ARG_TYPE, | ||
332 | .line = tok.line, | ||
333 | .col = tok.col, | ||
334 | }); | ||
335 | return NULL; | ||
336 | } | ||
337 | ret->var_name = parse_tree(parser, errors); | ||
338 | |||
339 | // Variable value (expression). | ||
340 | rewind_token(parser); | ||
341 | ret->var_expr = parse_lambda(parser, errors); | ||
342 | return ret; | ||
343 | } | ||
344 | |||
345 | Object * | ||
346 | parse_list(Parser *parser, Errors *errors) { | ||
347 | if (errors->n != 0) { | ||
348 | return NULL; | ||
349 | } | 93 | } |
350 | 94 | ||
351 | Token tok = peek_token(parser); | 95 | Token tok = peek_token(parser); |
352 | switch (tok.type) { | 96 | switch (tok.type) { |
353 | case TOKEN_RPAREN: { | 97 | case TOKEN_NUMBER: { |
354 | error_push(errors, (Error){ | 98 | return parse_number(parser); |
355 | .type = ERR_TYPE_PARSER, | ||
356 | .value = ERR_UNBALANCED_PAREN, | ||
357 | .line = tok.line, | ||
358 | .col = tok.col, | ||
359 | }); | ||
360 | return NULL; | ||
361 | } break; | ||
362 | case TOKEN_LAMBDA: { return parse_lambda(parser, errors); } break; | ||
363 | case TOKEN_IF: { return parse_if(parser, errors); } break; | ||
364 | case TOKEN_DEF: { return parse_var(parser, errors); } break; | ||
365 | case TOKEN_SET: { return parse_var(parser, errors); } break; | ||
366 | case TOKEN_FUN: { return parse_fun(parser, errors); } break; | ||
367 | default: break; | ||
368 | } | ||
369 | |||
370 | Token start = previous_token(parser); | ||
371 | Object *root = object_alloc(start, OBJ_TYPE_PAIR); | ||
372 | root->head = NULL; | ||
373 | root->tail = NULL; | ||
374 | root->n_elems = 0; | ||
375 | Object *current = root; | ||
376 | bool first = true; | ||
377 | while (has_next_token(parser)) { | ||
378 | Token tok = peek_token(parser); | ||
379 | current->head = parse_tree(parser, errors); | ||
380 | if (errors->n != 0 || current->head == NULL) { | ||
381 | return NULL; | ||
382 | } | ||
383 | |||
384 | if (first) { | ||
385 | if (!IS_SYMBOL(current->head) && | ||
386 | !IS_LAMBDA(current->head) && | ||
387 | !IS_BUILTIN(current->head)) { | ||
388 | error_push(errors, (Error){ | ||
389 | .type = ERR_TYPE_PARSER, | ||
390 | .value = ERR_NOT_CALLABLE, | ||
391 | .line = tok.line, | ||
392 | .col = tok.col, | ||
393 | }); | ||
394 | return NULL; | ||
395 | } | ||
396 | first = false; | ||
397 | } | ||
398 | |||
399 | tok = peek_token(parser); | ||
400 | if (tok.type == TOKEN_RPAREN) { | ||
401 | next_token(parser); | ||
402 | return root; | ||
403 | } | ||
404 | |||
405 | Object *next = object_alloc(start, OBJ_TYPE_PAIR); | ||
406 | next->head = NULL; | ||
407 | next->tail = NULL; | ||
408 | current->tail = next; | ||
409 | current = current->tail; | ||
410 | root->n_elems++; | ||
411 | } | ||
412 | error_push(errors, (Error){ | ||
413 | .type = ERR_TYPE_PARSER, | ||
414 | .value = ERR_UNBALANCED_PAREN, | ||
415 | .line = start.line, | ||
416 | .col = start.col, | ||
417 | }); | ||
418 | return NULL; | ||
419 | } | ||
420 | |||
421 | Object * | ||
422 | parse_tree(Parser *parser, Errors *errors) { | ||
423 | Token tok = next_token(parser); | ||
424 | if (errors->n != 0) { | ||
425 | return NULL; | ||
426 | } | ||
427 | switch (tok.type) { | ||
428 | case TOKEN_FIXNUM: { | ||
429 | return parse_fixnum(tok); | ||
430 | } break; | ||
431 | case TOKEN_TRUE: | ||
432 | case TOKEN_FALSE: { | ||
433 | return parse_bool(tok); | ||
434 | } break; | ||
435 | case TOKEN_RPAREN: { | ||
436 | error_push(errors, (Error){ | ||
437 | .type = ERR_TYPE_PARSER, | ||
438 | .value = ERR_UNBALANCED_PAREN, | ||
439 | .line = tok.line, | ||
440 | .col = tok.col, | ||
441 | }); | ||
442 | return NULL; | ||
443 | } break; | ||
444 | case TOKEN_LPAREN: { | ||
445 | return parse_list(parser, errors); | ||
446 | } break; | 99 | } break; |
447 | case TOKEN_STRING: { | 100 | case TOKEN_STRING: { |
448 | return parse_string(tok); | 101 | // printf("STRING!\n"); |
449 | } break; | 102 | next_token(parser); // FIXME: <==== |
450 | case TOKEN_SYMBOL: { | 103 | return (Node){.type = NODE_STRING, .string = tok.value}; |
451 | return parse_symbol(tok); | 104 | // TODO: parse_string(parser); |
452 | } break; | 105 | } break; |
453 | case TOKEN_NIL: { | 106 | case TOKEN_LPAREN: { |
454 | return &obj_nil; | 107 | // printf("LPAREN OH MY!\n"); |
108 | // TODO: parse_list(parser); | ||
455 | } break; | 109 | } break; |
456 | default: { | 110 | default: { |
457 | break; | 111 | // printf("OH OHHHH\n"); |
112 | // ... | ||
458 | } break; | 113 | } break; |
459 | } | 114 | } |
460 | return NULL; | 115 | next_token(parser); // FIXME: <==== |
461 | } | 116 | // TODO: this should be an error |
462 | |||
463 | ssize_t | ||
464 | find_var_index(Object **vars, Object *symbol) { | ||
465 | for (size_t i = 0; i < array_size(vars); i++) { | ||
466 | if (object_equal(vars[i], symbol)) { | ||
467 | return i; | ||
468 | } | ||
469 | } | ||
470 | return -1; | ||
471 | } | ||
472 | |||
473 | Object * | ||
474 | symbol_in_env(Environment *env, Object *symbol) { | ||
475 | while (env != NULL) { | ||
476 | ssize_t idx = find_var_index(env->locals, symbol); | ||
477 | if (idx != -1) { | ||
478 | return env->local_values[idx]; | ||
479 | } | ||
480 | idx = find_var_index(env->params, symbol); | ||
481 | if (idx != -1) { | ||
482 | return env->params[idx]; | ||
483 | } | ||
484 | env = env->parent; | ||
485 | } | ||
486 | return NULL; | ||
487 | } | ||
488 | |||
489 | void | ||
490 | insert_local(Environment *env, Object *symbol, Object *value) { | ||
491 | ssize_t idx = find_var_index(env->locals, symbol); | ||
492 | if (idx != -1) { | ||
493 | env->local_values[idx] = value; | ||
494 | return; | ||
495 | } | ||
496 | array_push(env->locals, symbol); | ||
497 | array_push(env->local_values, value); | ||
498 | } | 117 | } |
499 | 118 | ||
500 | void | 119 | void |
501 | insert_params(Environment *env, Object *symbol) { | 120 | print_node(Node node) { |
502 | if (find_var_index(env->params, symbol) != -1) { | 121 | switch (node.type) { |
503 | return; | 122 | case NODE_NUMBER: { |
504 | } | 123 | printf("%ld\n", node.number); |
505 | array_push(env->params, symbol); | ||
506 | } | ||
507 | |||
508 | void | ||
509 | insert_captured(Environment *env, Object *symbol) { | ||
510 | if (find_var_index(env->captured, symbol) != -1) { | ||
511 | return; | ||
512 | } | ||
513 | array_push(env->captured, symbol); | ||
514 | } | ||
515 | |||
516 | void | ||
517 | semantic_analysis(Environment *env, Object *obj, Errors *errors) { | ||
518 | if (obj == NULL || obj->visited) { | ||
519 | return; | ||
520 | } | ||
521 | obj->visited = true; | ||
522 | switch (obj->type) { | ||
523 | case OBJ_TYPE_SYMBOL: { | ||
524 | Object *found = NULL; | ||
525 | |||
526 | Environment *cur_env = env; | ||
527 | while (cur_env != NULL) { | ||
528 | ssize_t idx = find_var_index(cur_env->locals, obj); | ||
529 | if (idx != -1) { | ||
530 | found = cur_env->local_values[idx]; | ||
531 | if (cur_env != env) { | ||
532 | insert_captured(env, obj); | ||
533 | } | ||
534 | break; | ||
535 | } | ||
536 | idx = find_var_index(cur_env->params, obj); | ||
537 | if (idx != -1) { | ||
538 | found = cur_env->params[idx]; | ||
539 | break; | ||
540 | } | ||
541 | cur_env = cur_env->parent; | ||
542 | } | ||
543 | |||
544 | // Check if symbol is in other environments. | ||
545 | if (found == NULL) { | ||
546 | error_push(errors, (Error){ | ||
547 | .type = ERR_TYPE_PARSER, | ||
548 | .value = ERR_SYMBOL_NOT_FOUND, | ||
549 | .line = obj->line, | ||
550 | .col = obj->col, | ||
551 | }); | ||
552 | return; | ||
553 | } | ||
554 | semantic_analysis(env, found, errors); | ||
555 | } break; | ||
556 | case OBJ_TYPE_DEF: { | ||
557 | insert_local(env, obj->var_name, obj->var_expr); | ||
558 | semantic_analysis(env, obj->var_expr, errors); | ||
559 | } break; | ||
560 | case OBJ_TYPE_SET: { | ||
561 | semantic_analysis(env, obj->var_name, errors); | ||
562 | semantic_analysis(env, obj->var_expr, errors); | ||
563 | } break; | ||
564 | case OBJ_TYPE_IF: { | ||
565 | semantic_analysis(env, obj->condition, errors); | ||
566 | semantic_analysis(env, obj->expr_true, errors); | ||
567 | semantic_analysis(env, obj->expr_false, errors); | ||
568 | } break; | 124 | } break; |
569 | case OBJ_TYPE_PAIR: { | 125 | case NODE_STRING: { |
570 | Object *head = obj->head; | 126 | sv_write(&node.string); |
571 | if (IS_SYMBOL(head)) { | 127 | printf("\n"); |
572 | head = symbol_in_env(env, head); | ||
573 | if (head == NULL) { | ||
574 | error_push(errors, (Error){ | ||
575 | .type = ERR_TYPE_PARSER, | ||
576 | .value = ERR_SYMBOL_NOT_FOUND, | ||
577 | .line = obj->head->line, | ||
578 | .col = obj->head->col, | ||
579 | }); | ||
580 | return; | ||
581 | } | ||
582 | } | ||
583 | if (IS_LAMBDA(head)) { | ||
584 | if (obj->n_elems != array_size(head->params)) { | ||
585 | error_push(errors, (Error){ | ||
586 | .type = ERR_TYPE_PARSER, | ||
587 | .value = ERR_NOT_ENOUGH_ARGS, | ||
588 | .line = obj->line, | ||
589 | .col = obj->col | ||
590 | }); | ||
591 | return; | ||
592 | } | ||
593 | } | ||
594 | semantic_analysis(env, obj->head, errors); | ||
595 | semantic_analysis(env, obj->tail, errors); | ||
596 | } break; | 128 | } break; |
597 | case OBJ_TYPE_LAMBDA: { | 129 | default: { |
598 | // Initialize scope for this lambda. | 130 | // ... |
599 | Environment *new_env = env_alloc(env); | ||
600 | obj->env = new_env; | ||
601 | for (size_t i = 0; i < array_size(obj->params); i++) { | ||
602 | insert_params(obj->env, obj->params[i]); | ||
603 | } | ||
604 | // Used for removing unnecessary statements. | ||
605 | Object **new_body = NULL; | ||
606 | array_init(new_body, 0); | ||
607 | for (size_t i = 0; i < array_size(obj->body); i++) { | ||
608 | Object *expr = obj->body[i]; | ||
609 | if (i != array_size(obj->body) - 1) { | ||
610 | if (IS_FIXNUM(expr) || | ||
611 | IS_STRING(expr) || | ||
612 | IS_SYMBOL(expr) || | ||
613 | IS_LAMBDA(expr) || | ||
614 | IS_BOOL(expr) || | ||
615 | IS_NIL(expr)) { | ||
616 | continue; | ||
617 | } | ||
618 | } | ||
619 | semantic_analysis(obj->env, expr, errors); | ||
620 | array_push(new_body, expr); | ||
621 | } | ||
622 | array_free(obj->body); | ||
623 | obj->body = new_body; | ||
624 | } break; | 131 | } break; |
625 | default: break; | ||
626 | } | 132 | } |
627 | } | 133 | } |
628 | 134 | ||
629 | Program | 135 | void |
630 | parse(Token *tokens, Errors *errors) { | 136 | parse(Token *tokens) { |
631 | array_init(roots, 0); | ||
632 | array_init(objects, 0); | ||
633 | array_init(environments, 0); | ||
634 | Parser parser = { | 137 | Parser parser = { |
635 | .tokens = tokens, | 138 | .tokens = tokens, |
636 | .current = 0, | 139 | .current = 0, |
637 | }; | 140 | }; |
638 | 141 | while (has_next(&parser)) { | |
639 | // Build initial ASTs. This also ensures the core grammar is correct. | 142 | Node node = parse_next(&parser); |
640 | while (has_next_token(&parser)) { | 143 | if (node.type == NODE_ERR) { |
641 | Object *root = parse_tree(&parser, errors); | 144 | return; |
642 | if (errors->n != 0) { | ||
643 | return (Program){0}; | ||
644 | } | 145 | } |
645 | array_push(roots, root); | 146 | print_node(node); |
646 | } | 147 | // Token tok = next_token(&parser); |
647 | 148 | // print_token(tok); | |
648 | // Prepare global environment. | ||
649 | Environment *env = env_alloc(NULL); | ||
650 | |||
651 | // Perform semantic analysis: | ||
652 | // 1. Populate symbol tables and ensure symbols are in scope when used. | ||
653 | // 2. Removing unnecessary expressions. | ||
654 | // 3. Verify number of arguments is correct in function calls. | ||
655 | Root *final_roots = NULL; | ||
656 | array_init(final_roots, 0); | ||
657 | for (size_t i = 0; i < array_size(roots); i++) { | ||
658 | Object *root = roots[i]; | ||
659 | if (i != array_size(roots) - 1) { | ||
660 | if (IS_FIXNUM(root) || | ||
661 | IS_STRING(root) || | ||
662 | IS_SYMBOL(root) || | ||
663 | IS_LAMBDA(root) || | ||
664 | IS_BOOL(root) || | ||
665 | IS_NIL(root)) { | ||
666 | continue; | ||
667 | } | ||
668 | } | ||
669 | array_push(final_roots, root); | ||
670 | semantic_analysis(env, root, errors); | ||
671 | if (errors->n != 0) { | ||
672 | array_free(final_roots); | ||
673 | return (Program){0}; | ||
674 | } | ||
675 | } | ||
676 | array_free(roots); | ||
677 | roots = final_roots; | ||
678 | |||
679 | // TODO: Check if primitive procedures have been given the right number of | ||
680 | // arguments. | ||
681 | // TODO: Type check basic expressions (e.g. arithmetic/numeric comparisons). | ||
682 | // We can't be sure when we have functions unless the return type is known. | ||
683 | |||
684 | return (Program){roots, env}; | ||
685 | } | ||
686 | |||
687 | Environment * | ||
688 | env_alloc(Environment *parent) { | ||
689 | Environment *env = malloc(sizeof(Environment)); | ||
690 | env->locals = NULL; | ||
691 | env->local_values = NULL; | ||
692 | env->params = NULL; | ||
693 | array_init(env->locals, 0); | ||
694 | array_init(env->local_values, 0); | ||
695 | array_init(env->params, 0); | ||
696 | array_init(env->captured, 0); | ||
697 | env->parent = parent; | ||
698 | array_push(environments, env); | ||
699 | return env; | ||
700 | } | ||
701 | |||
702 | Object * | ||
703 | object_alloc(Token tok, ObjectType type) { | ||
704 | Object *node = malloc(sizeof(Object)); | ||
705 | node->line = tok.line; | ||
706 | node->col = tok.col; | ||
707 | node->type = type; | ||
708 | node->visited = false; | ||
709 | array_push(objects, node); | ||
710 | return node; | ||
711 | } | ||
712 | |||
713 | void | ||
714 | object_free(Object *node) { | ||
715 | if (node == NULL) { | ||
716 | return; | ||
717 | } | ||
718 | if (IS_LAMBDA(node)) { | ||
719 | array_free(node->params); | ||
720 | array_free(node->body); | ||
721 | } | ||
722 | free(node); | ||
723 | } | ||
724 | |||
725 | void | ||
726 | free_objects(void) { | ||
727 | if (objects != NULL) { | ||
728 | for (size_t i = 0; i < array_size(objects); i++) { | ||
729 | object_free(objects[i]); | ||
730 | } | ||
731 | array_free(objects); | ||
732 | } | ||
733 | if (environments != NULL) { | ||
734 | for (size_t i = 0; i < array_size(environments); i++) { | ||
735 | Environment *env = environments[i]; | ||
736 | array_free(env->locals); | ||
737 | array_free(env->local_values); | ||
738 | array_free(env->params); | ||
739 | array_free(env->captured); | ||
740 | free(env); | ||
741 | } | ||
742 | array_free(environments); | ||
743 | } | ||
744 | array_free(roots); | ||
745 | } | ||
746 | |||
747 | void | ||
748 | display_pair(Object *obj) { | ||
749 | object_display(obj->head); | ||
750 | if (obj->tail == NULL) { | ||
751 | return; | ||
752 | } | ||
753 | if (IS_PAIR(obj->tail)) { | ||
754 | printf(" "); | ||
755 | display_pair(obj->tail); | ||
756 | } else { | ||
757 | printf(" . "); | ||
758 | object_display(obj->tail); | ||
759 | } | ||
760 | } | ||
761 | |||
762 | void | ||
763 | object_display(Object *obj) { | ||
764 | if (obj == NULL) { | ||
765 | printf("#{error}"); | ||
766 | return; | ||
767 | } | ||
768 | switch (obj->type) { | ||
769 | case OBJ_TYPE_FIXNUM: { | ||
770 | printf("%zd", obj->fixnum); | ||
771 | } break; | ||
772 | case OBJ_TYPE_TRUE: { | ||
773 | printf("true"); | ||
774 | } break; | ||
775 | case OBJ_TYPE_FALSE: { | ||
776 | printf("false"); | ||
777 | } break; | ||
778 | case OBJ_TYPE_NIL: { | ||
779 | printf("()"); | ||
780 | } break; | ||
781 | case OBJ_TYPE_STRING: { | ||
782 | printf("\"%.*s\"", (int)obj->text.n, obj->text.start); | ||
783 | } break; | ||
784 | case OBJ_TYPE_SYMBOL: { | ||
785 | printf(":%.*s", (int)obj->text.n, obj->text.start); | ||
786 | } break; | ||
787 | case OBJ_TYPE_PAIR: { | ||
788 | printf("("); | ||
789 | display_pair(obj); | ||
790 | printf(")"); | ||
791 | } break; | ||
792 | case OBJ_TYPE_LAMBDA: { | ||
793 | printf("#{ lambda ( "); | ||
794 | for (size_t i = 0; i < array_size(obj->params); i++) { | ||
795 | object_display(obj->params[i]); | ||
796 | printf(" "); | ||
797 | } | ||
798 | printf(") "); | ||
799 | for (size_t i = 0; i < array_size(obj->body); i++) { | ||
800 | object_display(obj->body[i]); | ||
801 | printf(" "); | ||
802 | } | ||
803 | printf("}"); | ||
804 | } break; | ||
805 | case OBJ_TYPE_IF: { | ||
806 | printf("#{ if "); | ||
807 | object_display(obj->condition); | ||
808 | printf(" "); | ||
809 | object_display(obj->expr_true); | ||
810 | if (obj->expr_false != NULL) { | ||
811 | printf(" "); | ||
812 | object_display(obj->expr_false); | ||
813 | } | ||
814 | printf(" }"); | ||
815 | } break; | ||
816 | case OBJ_TYPE_DEF: { | ||
817 | printf("#{ def "); | ||
818 | object_display(obj->var_name); | ||
819 | printf(" "); | ||
820 | object_display(obj->var_expr); | ||
821 | printf(" }"); | ||
822 | } break; | ||
823 | case OBJ_TYPE_SET: { | ||
824 | printf("#{ set! "); | ||
825 | object_display(obj->var_name); | ||
826 | printf(" "); | ||
827 | object_display(obj->var_expr); | ||
828 | printf(" }"); | ||
829 | } break; | ||
830 | case OBJ_TYPE_BUILTIN: { | ||
831 | printf("%.*s", (int)obj->builtin_text.n, obj->builtin_text.start); | ||
832 | } break; | ||
833 | } | ||
834 | return; | ||
835 | } | ||
836 | |||
837 | bool | ||
838 | object_equal(Object *a, Object *b) { | ||
839 | if (a == NULL || b == NULL || a->type != b->type) { | ||
840 | return false; | ||
841 | } | ||
842 | switch (a->type) { | ||
843 | case OBJ_TYPE_TRUE: | ||
844 | case OBJ_TYPE_FALSE: { | ||
845 | return true; | ||
846 | } break; | ||
847 | case OBJ_TYPE_FIXNUM: { | ||
848 | return a->fixnum == b->fixnum; | ||
849 | } break; | ||
850 | case OBJ_TYPE_SYMBOL: | ||
851 | case OBJ_TYPE_STRING: { | ||
852 | return sv_equal(&a->text, &b->text); | ||
853 | } break; | ||
854 | case OBJ_TYPE_BUILTIN: { | ||
855 | return a->builtin == b->builtin; | ||
856 | } break; | ||
857 | case OBJ_TYPE_PAIR: { | ||
858 | Object *a_head = a->head; | ||
859 | Object *b_head = b->head; | ||
860 | if (!object_equal(a_head, b_head)) { | ||
861 | return false; | ||
862 | } | ||
863 | Object *a_tail = a->tail; | ||
864 | Object *b_tail = b->tail; | ||
865 | while (a_tail != NULL && b_tail != NULL) { | ||
866 | if (!object_equal(a_head, b_head)) { | ||
867 | return false; | ||
868 | } | ||
869 | a_head = a_tail->head; | ||
870 | b_head = b_tail->head; | ||
871 | a_tail = a_tail->tail; | ||
872 | b_tail = b_tail->tail; | ||
873 | } | ||
874 | if (a_tail == b_tail && object_equal(a_head, b_head)) { | ||
875 | return true; | ||
876 | } | ||
877 | return false; | ||
878 | } break; | ||
879 | case OBJ_TYPE_LAMBDA: { | ||
880 | size_t n_params_a = array_size(a->params); | ||
881 | size_t n_params_b = array_size(b->params); | ||
882 | size_t n_expr_a = array_size(a->body); | ||
883 | size_t n_expr_b = array_size(b->body); | ||
884 | if (n_params_a != n_params_b || n_expr_a != n_expr_b) { | ||
885 | return false; | ||
886 | } | ||
887 | for (size_t i = 0; i < n_params_a; ++i) { | ||
888 | if (!object_equal(a->params[i], b->params[i])) { | ||
889 | return false; | ||
890 | } | ||
891 | } | ||
892 | for (size_t i = 0; i < n_expr_a; ++i) { | ||
893 | if (!object_equal(a->body[i], b->body[i])) { | ||
894 | return false; | ||
895 | } | ||
896 | } | ||
897 | return true; | ||
898 | } break; | ||
899 | default: break; | ||
900 | } | 149 | } |
901 | return false; | ||
902 | } | 150 | } |
diff --git a/src/parser.h b/src/parser.h index 2604be4..3e016d3 100644 --- a/src/parser.h +++ b/src/parser.h | |||
@@ -3,162 +3,50 @@ | |||
3 | 3 | ||
4 | #include "lexer.h" | 4 | #include "lexer.h" |
5 | 5 | ||
6 | typedef struct Environment { | ||
7 | struct Object **locals; | ||
8 | struct Object **local_values; | ||
9 | struct Object **params; | ||
10 | struct Object **captured; | ||
11 | struct Environment *parent; | ||
12 | } Environment; | ||
13 | |||
14 | typedef enum ObjectType { | ||
15 | OBJ_TYPE_NIL, | ||
16 | OBJ_TYPE_TRUE, | ||
17 | OBJ_TYPE_FALSE, | ||
18 | OBJ_TYPE_FIXNUM, | ||
19 | OBJ_TYPE_SYMBOL, | ||
20 | OBJ_TYPE_STRING, | ||
21 | OBJ_TYPE_PAIR, | ||
22 | OBJ_TYPE_LAMBDA, | ||
23 | OBJ_TYPE_IF, | ||
24 | OBJ_TYPE_DEF, | ||
25 | OBJ_TYPE_SET, | ||
26 | OBJ_TYPE_BUILTIN, | ||
27 | } ObjectType; | ||
28 | |||
29 | typedef enum Builtin { | ||
30 | BUILTIN_ADD, | ||
31 | BUILTIN_SUB, | ||
32 | BUILTIN_MUL, | ||
33 | BUILTIN_DIV, | ||
34 | BUILTIN_MOD, | ||
35 | BUILTIN_EQ, | ||
36 | BUILTIN_LT, | ||
37 | BUILTIN_GT, | ||
38 | BUILTIN_LE, | ||
39 | BUILTIN_GE, | ||
40 | BUILTIN_NOT, | ||
41 | BUILTIN_AND, | ||
42 | BUILTIN_OR, | ||
43 | BUILTIN_IS_NIL, | ||
44 | BUILTIN_IS_ZERO, | ||
45 | BUILTIN_IS_FIXNUM, | ||
46 | BUILTIN_IS_BOOL, | ||
47 | BUILTIN_PRINT, | ||
48 | BUILTIN_CONS, | ||
49 | BUILTIN_CAR, | ||
50 | BUILTIN_CDR, | ||
51 | } Builtin; | ||
52 | |||
53 | typedef struct Object { | ||
54 | ObjectType type; | ||
55 | union { | ||
56 | // OBJ_TYPE_FIXNUM | ||
57 | ssize_t fixnum; | ||
58 | |||
59 | // OBJ_TYPE_STRING | ||
60 | // OBJ_TYPE_SYMBOL | ||
61 | StringView text; | ||
62 | |||
63 | // OBJ_TYPE_PAIR | ||
64 | struct { | ||
65 | struct Object *head; | ||
66 | struct Object *tail; | ||
67 | size_t n_elems; | ||
68 | }; | ||
69 | |||
70 | // OBJ_TYPE_LAMBDA | ||
71 | struct { | ||
72 | struct Object **params; | ||
73 | struct Object **body; | ||
74 | Environment *env; | ||
75 | }; | ||
76 | |||
77 | // OBJ_TYPE_IF | ||
78 | struct { | ||
79 | struct Object *condition; | ||
80 | struct Object *expr_true; | ||
81 | struct Object *expr_false; | ||
82 | }; | ||
83 | |||
84 | // OBJ_TYPE_DEF | ||
85 | // OBJ_TYPE_SET | ||
86 | struct { | ||
87 | struct Object *var_name; | ||
88 | struct Object *var_expr; | ||
89 | }; | ||
90 | |||
91 | // OBJ_TYPE_BUILTIN | ||
92 | struct { | ||
93 | Builtin builtin; | ||
94 | StringView builtin_text; | ||
95 | }; | ||
96 | }; | ||
97 | |||
98 | bool visited; | ||
99 | size_t line; | ||
100 | size_t col; | ||
101 | } Object; | ||
102 | |||
103 | typedef struct Parser { | 6 | typedef struct Parser { |
104 | Token *tokens; | 7 | Token *tokens; |
105 | size_t current; | 8 | size_t current; |
106 | } Parser; | 9 | } Parser; |
107 | 10 | ||
108 | typedef Object* Root; | 11 | typedef enum NodeType { |
109 | 12 | NODE_ERR, | |
110 | typedef struct Program { | 13 | // NODE_FUNCALL, |
111 | Root *roots; | 14 | // NODE_U64, |
112 | Environment *env; | 15 | // NODE_U32, |
113 | } Program; | 16 | // NODE_U16, |
114 | 17 | // NODE_U8, | |
115 | // Token scanner. | 18 | // NODE_S64, |
116 | Token next_token(Parser *parser); | 19 | // NODE_S32, |
117 | Token previous_token(Parser *parser); | 20 | // NODE_S16, |
118 | Token rewind_token(Parser *parser); | 21 | // NODE_S8, |
119 | Token peek_token(const Parser *parser); | 22 | NODE_NUMBER, |
120 | bool has_next_token(const Parser *parser); | 23 | NODE_STRING, |
121 | 24 | } NodeType; | |
122 | // Parsing operations. | 25 | |
123 | Object * parse_tree(Parser *parser, Errors *errors); | 26 | typedef struct Node { |
124 | Object * parse_symbol(Token tok); | 27 | NodeType type; |
125 | Object * parse_string(Token tok); | ||
126 | Object * parse_bool(Token tok); | ||
127 | Object * parse_fixnum(Token tok); | ||
128 | Object * parse_list(Parser *parser, Errors *errors); | ||
129 | Object * parse_lambda(Parser *parser, Errors *errors); | ||
130 | Object * parse_if(Parser *parser, Errors *errors); | ||
131 | Object * parse_var(Parser *parser, Errors *errors); | ||
132 | Program parse(Token *tokens, Errors *errors); | ||
133 | 28 | ||
134 | // Object operations. | 29 | union { |
135 | void object_display(Object *obj); | 30 | // Integer numbers. |
136 | bool object_equal(Object *a, Object *b); | 31 | // u64 as_u64; |
137 | 32 | // u32 as_u32; | |
138 | // Manage resources. | 33 | // u16 as_u16; |
139 | Environment * env_alloc(Environment *parent); | 34 | // u8 as_u8; |
140 | Object * object_alloc(Token tok, ObjectType type); | 35 | // s64 as_s64; |
141 | void object_free(Object *node); | 36 | // s32 as_s32; |
142 | void free_objects(void); | 37 | // s16 as_s16; |
143 | 38 | // s8 as_s8; | |
144 | // | 39 | s64 number; |
145 | // Helper macros. | 40 | |
146 | // | 41 | // String. |
147 | 42 | StringView string; | |
148 | // Type checking. | 43 | // struct { |
149 | #define IS_NIL(VAL) ((VAL)->type == OBJ_TYPE_NIL) | 44 | // u8 *str; |
150 | #define IS_TRUE(VAL) ((VAL)->type != OBJ_TYPE_FALSE) | 45 | // u64 n; |
151 | #define IS_FALSE(VAL) ((VAL)->type == OBJ_TYPE_FALSE) | 46 | // } as_str; |
152 | #define IS_BOOL(VAL) \ | 47 | }; |
153 | (((VAL)->type == OBJ_TYPE_FALSE) || ((VAL)->type == OBJ_TYPE_TRUE)) | 48 | } Node; |
154 | #define IS_FIXNUM(VAL) ((VAL)->type == OBJ_TYPE_FIXNUM) | ||
155 | #define IS_STRING(VAL) ((VAL)->type == OBJ_TYPE_STRING) | ||
156 | #define IS_SYMBOL(VAL) ((VAL)->type == OBJ_TYPE_SYMBOL) | ||
157 | #define IS_PAIR(VAL) ((VAL)->type == OBJ_TYPE_PAIR) | ||
158 | #define IS_LAMBDA(VAL) ((VAL)->type == OBJ_TYPE_LAMBDA) | ||
159 | #define IS_BUILTIN(VAL) ((VAL)->type == OBJ_TYPE_BUILTIN) | ||
160 | 49 | ||
161 | // Debug. | 50 | void parse(Token *tokens); |
162 | #define OBJ_PRINT(OBJ) object_display(OBJ); printf("\n"); | ||
163 | 51 | ||
164 | #endif // BDL_PARSER_H | 52 | #endif // BDL_PARSER_H |