diff options
-rwxr-xr-x | src/lexer.c | 2 | ||||
-rwxr-xr-x | src/lexer.h | 3 | ||||
-rwxr-xr-x | src/main.c | 4 | ||||
-rw-r--r-- | src/parser.c | 130 | ||||
-rwxr-xr-x | src/parser.h | 15 |
5 files changed, 125 insertions, 29 deletions
diff --git a/src/lexer.c b/src/lexer.c index ac93a5c..f272901 100755 --- a/src/lexer.c +++ b/src/lexer.c | |||
@@ -11,6 +11,7 @@ static const char* token_str[] = { | |||
11 | [TOKEN_NIL] = "TOKEN_NIL", | 11 | [TOKEN_NIL] = "TOKEN_NIL", |
12 | [TOKEN_TRUE] = "TOKEN_TRUE", | 12 | [TOKEN_TRUE] = "TOKEN_TRUE", |
13 | [TOKEN_FALSE] = "TOKEN_FALSE", | 13 | [TOKEN_FALSE] = "TOKEN_FALSE", |
14 | [TOKEN_LAMBDA] = "TOKEN_LAMBDA", | ||
14 | [TOKEN_EOF] = "TOKEN_EOF", | 15 | [TOKEN_EOF] = "TOKEN_EOF", |
15 | }; | 16 | }; |
16 | 17 | ||
@@ -124,6 +125,7 @@ find_primitive_type(const StringView value) { | |||
124 | if (TOKEN_IS_KEYWORD(value, "nil")) { return TOKEN_NIL; } | 125 | if (TOKEN_IS_KEYWORD(value, "nil")) { return TOKEN_NIL; } |
125 | if (TOKEN_IS_KEYWORD(value, "true")) { return TOKEN_TRUE; } | 126 | if (TOKEN_IS_KEYWORD(value, "true")) { return TOKEN_TRUE; } |
126 | if (TOKEN_IS_KEYWORD(value, "false")) { return TOKEN_FALSE; } | 127 | if (TOKEN_IS_KEYWORD(value, "false")) { return TOKEN_FALSE; } |
128 | if (TOKEN_IS_KEYWORD(value, "lambda")) { return TOKEN_LAMBDA; } | ||
127 | 129 | ||
128 | return TOKEN_SYMBOL; | 130 | return TOKEN_SYMBOL; |
129 | } | 131 | } |
diff --git a/src/lexer.h b/src/lexer.h index 43898bd..4fa4db5 100755 --- a/src/lexer.h +++ b/src/lexer.h | |||
@@ -18,6 +18,9 @@ typedef enum TokenType { | |||
18 | TOKEN_TRUE, | 18 | TOKEN_TRUE, |
19 | TOKEN_FALSE, | 19 | TOKEN_FALSE, |
20 | 20 | ||
21 | // Keywords. | ||
22 | TOKEN_LAMBDA, | ||
23 | |||
21 | // End of file. | 24 | // End of file. |
22 | TOKEN_EOF, | 25 | TOKEN_EOF, |
23 | } TokenType; | 26 | } TokenType; |
@@ -35,7 +35,7 @@ process_source(const StringView *source, const char *file_name) { | |||
35 | Root *roots = parse(tokens, &errors); | 35 | Root *roots = parse(tokens, &errors); |
36 | if (errors.n != 0) { | 36 | if (errors.n != 0) { |
37 | report_errors(&errors, file_name); | 37 | report_errors(&errors, file_name); |
38 | free_roots(roots); | 38 | free_objects(); |
39 | array_free(tokens); | 39 | array_free(tokens); |
40 | exit(EXIT_FAILURE); | 40 | exit(EXIT_FAILURE); |
41 | } | 41 | } |
@@ -46,7 +46,7 @@ process_source(const StringView *source, const char *file_name) { | |||
46 | // TODO: Compilation. | 46 | // TODO: Compilation. |
47 | 47 | ||
48 | // Free resources. | 48 | // Free resources. |
49 | free_roots(roots); | 49 | free_objects(); |
50 | } | 50 | } |
51 | 51 | ||
52 | void | 52 | void |
diff --git a/src/parser.c b/src/parser.c index 2d70c9d..8a6c4ce 100644 --- a/src/parser.c +++ b/src/parser.c | |||
@@ -1,6 +1,9 @@ | |||
1 | #include "parser.h" | 1 | #include "parser.h" |
2 | #include "darray.h" | 2 | #include "darray.h" |
3 | 3 | ||
4 | static Object **objects = NULL; | ||
5 | static Root *roots = NULL; | ||
6 | |||
4 | Token | 7 | Token |
5 | peek_token(const Parser *parser) { | 8 | peek_token(const Parser *parser) { |
6 | return parser->tokens[parser->current]; | 9 | return parser->tokens[parser->current]; |
@@ -68,19 +71,81 @@ parse_symbol(Token tok) { | |||
68 | } | 71 | } |
69 | 72 | ||
70 | Object * | 73 | Object * |
74 | parse_lambda(Parser *parser, Errors *errors) { | ||
75 | Token start = next_token(parser); | ||
76 | Object *lambda = object_alloc(start, OBJ_TYPE_LAMBDA); | ||
77 | array_init(lambda->params, 0); | ||
78 | array_init(lambda->body, 0); | ||
79 | |||
80 | // Parse parameters. | ||
81 | Token tok = next_token(parser); | ||
82 | if (tok.type == TOKEN_LPAREN) { | ||
83 | while (has_next_token(parser)) { | ||
84 | Token tok = next_token(parser); | ||
85 | if (tok.type == TOKEN_RPAREN) { | ||
86 | break; | ||
87 | } | ||
88 | if (tok.type != TOKEN_SYMBOL) { | ||
89 | error_push(errors, (Error){ | ||
90 | .type = ERR_TYPE_PARSER, | ||
91 | .value = ERR_WRONG_ARG_TYPE, | ||
92 | .line = tok.line, | ||
93 | .col = tok.col, | ||
94 | }); | ||
95 | } | ||
96 | Object *symbol = parse_symbol(tok); | ||
97 | array_push(lambda->params, symbol); | ||
98 | } | ||
99 | } else if (tok.type != TOKEN_NIL) { | ||
100 | error_push(errors, (Error){ | ||
101 | .type = ERR_TYPE_PARSER, | ||
102 | .value = ERR_WRONG_ARG_TYPE, | ||
103 | .line = tok.line, | ||
104 | .col = tok.col, | ||
105 | }); | ||
106 | } | ||
107 | |||
108 | // Parse body. | ||
109 | while (has_next_token(parser)) { | ||
110 | Token tok = peek_token(parser); | ||
111 | if (tok.type == TOKEN_RPAREN) { | ||
112 | next_token(parser); | ||
113 | break; | ||
114 | } | ||
115 | Object *expr = parse_tree(parser, errors); | ||
116 | array_push(lambda->body, expr); | ||
117 | } | ||
118 | return lambda; | ||
119 | } | ||
120 | |||
121 | Object * | ||
71 | parse_list(Parser *parser, Errors *errors) { | 122 | parse_list(Parser *parser, Errors *errors) { |
72 | if (errors->n != 0) { | 123 | if (errors->n != 0) { |
73 | return NULL; | 124 | return NULL; |
74 | } | 125 | } |
126 | |||
127 | Token tok = peek_token(parser); | ||
128 | if (tok.type == TOKEN_RPAREN) { | ||
129 | error_push(errors, (Error){ | ||
130 | .type = ERR_TYPE_PARSER, | ||
131 | .value = ERR_UNBALANCED_PAREN, | ||
132 | .line = tok.line, | ||
133 | .col = tok.col, | ||
134 | }); | ||
135 | return NULL; | ||
136 | } | ||
137 | if (tok.type == TOKEN_LAMBDA) { | ||
138 | return parse_lambda(parser, errors); | ||
139 | } | ||
140 | |||
75 | Token start = previous_token(parser); | 141 | Token start = previous_token(parser); |
76 | Object *root = object_alloc(start, OBJ_TYPE_PAIR); | 142 | Object *root = object_alloc(start, OBJ_TYPE_PAIR); |
77 | root->car = NULL; | 143 | root->head = NULL; |
78 | root->cdr = NULL; | 144 | root->tail = NULL; |
79 | Object *current = root; | 145 | Object *current = root; |
80 | while (has_next_token(parser)) { | 146 | while (has_next_token(parser)) { |
81 | current->car = parse_tree(parser, errors); | 147 | current->head = parse_tree(parser, errors); |
82 | if (errors->n != 0 || current->car == NULL) { | 148 | if (errors->n != 0 || current->head == NULL) { |
83 | object_free(root); | ||
84 | return NULL; | 149 | return NULL; |
85 | } | 150 | } |
86 | 151 | ||
@@ -91,12 +156,11 @@ parse_list(Parser *parser, Errors *errors) { | |||
91 | } | 156 | } |
92 | 157 | ||
93 | Object *next = object_alloc(start, OBJ_TYPE_PAIR); | 158 | Object *next = object_alloc(start, OBJ_TYPE_PAIR); |
94 | next->car = NULL; | 159 | next->head = NULL; |
95 | next->cdr = NULL; | 160 | next->tail = NULL; |
96 | current->cdr = next; | 161 | current->tail = next; |
97 | current = current->cdr; | 162 | current = current->tail; |
98 | } | 163 | } |
99 | object_free(root); | ||
100 | error_push(errors, (Error){ | 164 | error_push(errors, (Error){ |
101 | .type = ERR_TYPE_PARSER, | 165 | .type = ERR_TYPE_PARSER, |
102 | .value = ERR_UNBALANCED_PAREN, | 166 | .value = ERR_UNBALANCED_PAREN, |
@@ -156,7 +220,7 @@ parse_tree(Parser *parser, Errors *errors) { | |||
156 | 220 | ||
157 | Root * | 221 | Root * |
158 | parse(Token *tokens, Errors *errors) { | 222 | parse(Token *tokens, Errors *errors) { |
159 | Root *roots = NULL; | 223 | // Build initial AST. |
160 | array_init(roots, 0); | 224 | array_init(roots, 0); |
161 | Parser parser = { | 225 | Parser parser = { |
162 | .tokens = tokens, | 226 | .tokens = tokens, |
@@ -170,15 +234,24 @@ parse(Token *tokens, Errors *errors) { | |||
170 | } | 234 | } |
171 | array_push(roots, root); | 235 | array_push(roots, root); |
172 | } | 236 | } |
237 | |||
238 | // Perform semantic analysis. | ||
239 | // 1. Ensure core grammar is correct. | ||
240 | // 2. Check that symbols are defined before usage. | ||
241 | // 3. Remove unnecessary statements. | ||
173 | return roots; | 242 | return roots; |
174 | } | 243 | } |
175 | 244 | ||
176 | Object * | 245 | Object * |
177 | object_alloc(Token tok, ObjectType type) { | 246 | object_alloc(Token tok, ObjectType type) { |
247 | if (objects == NULL) { | ||
248 | array_init(objects, 0); | ||
249 | } | ||
178 | Object *node = malloc(sizeof(Object)); | 250 | Object *node = malloc(sizeof(Object)); |
179 | node->line = tok.line; | 251 | node->line = tok.line; |
180 | node->col = tok.col; | 252 | node->col = tok.col; |
181 | node->type = type; | 253 | node->type = type; |
254 | array_push(objects, node); | ||
182 | return node; | 255 | return node; |
183 | } | 256 | } |
184 | 257 | ||
@@ -190,35 +263,36 @@ object_free(Object *node) { | |||
190 | if (IS_STRING(node) || IS_SYMBOL(node)) { | 263 | if (IS_STRING(node) || IS_SYMBOL(node)) { |
191 | array_free(node->text); | 264 | array_free(node->text); |
192 | } | 265 | } |
193 | if (IS_PAIR(node)) { | 266 | if (IS_LAMBDA(node)) { |
194 | object_free(node->car); | 267 | array_free(node->params); |
195 | object_free(node->cdr); | 268 | array_free(node->body); |
196 | } | 269 | } |
197 | free(node); | 270 | free(node); |
198 | } | 271 | } |
199 | 272 | ||
200 | void | 273 | void |
201 | free_roots(Root *roots) { | 274 | free_objects(void) { |
202 | if (roots != NULL) { | 275 | if (objects != NULL) { |
203 | for (size_t i = 0; i < array_size(roots); i++) { | 276 | for (size_t i = 0; i < array_size(objects); i++) { |
204 | object_free(roots[i]); | 277 | object_free(objects[i]); |
205 | } | 278 | } |
206 | array_free(roots); | 279 | array_free(objects); |
207 | } | 280 | } |
281 | array_free(roots); | ||
208 | } | 282 | } |
209 | 283 | ||
210 | void | 284 | void |
211 | display_pair(Object *obj) { | 285 | display_pair(Object *obj) { |
212 | object_display(obj->car); | 286 | object_display(obj->head); |
213 | if (obj->cdr == NULL) { | 287 | if (obj->tail == NULL) { |
214 | return; | 288 | return; |
215 | } | 289 | } |
216 | if (IS_PAIR(obj->cdr)) { | 290 | if (IS_PAIR(obj->tail)) { |
217 | printf(" "); | 291 | printf(" "); |
218 | display_pair(obj->cdr); | 292 | display_pair(obj->tail); |
219 | } else { | 293 | } else { |
220 | printf(" . "); | 294 | printf(" . "); |
221 | object_display(obj->cdr); | 295 | object_display(obj->tail); |
222 | } | 296 | } |
223 | } | 297 | } |
224 | 298 | ||
@@ -252,6 +326,14 @@ object_display(Object *obj) { | |||
252 | display_pair(obj); | 326 | display_pair(obj); |
253 | printf(")"); | 327 | printf(")"); |
254 | } break; | 328 | } break; |
329 | case OBJ_TYPE_LAMBDA: { | ||
330 | printf("#{lambda( "); | ||
331 | for (size_t i = 0; i < array_size(obj->params); i++) { | ||
332 | object_display(obj->params[i]); | ||
333 | printf(" "); | ||
334 | } | ||
335 | printf(")}"); | ||
336 | } break; | ||
255 | } | 337 | } |
256 | return; | 338 | return; |
257 | } | 339 | } |
diff --git a/src/parser.h b/src/parser.h index ee5febe..17bd6d6 100755 --- a/src/parser.h +++ b/src/parser.h | |||
@@ -11,6 +11,7 @@ typedef enum ObjectType { | |||
11 | OBJ_TYPE_SYMBOL, | 11 | OBJ_TYPE_SYMBOL, |
12 | OBJ_TYPE_STRING, | 12 | OBJ_TYPE_STRING, |
13 | OBJ_TYPE_PAIR, | 13 | OBJ_TYPE_PAIR, |
14 | OBJ_TYPE_LAMBDA, | ||
14 | } ObjectType; | 15 | } ObjectType; |
15 | 16 | ||
16 | typedef struct Object { | 17 | typedef struct Object { |
@@ -25,9 +26,15 @@ typedef struct Object { | |||
25 | 26 | ||
26 | // OBJ_TYPE_PAIR | 27 | // OBJ_TYPE_PAIR |
27 | struct { | 28 | struct { |
28 | struct Object *car; | 29 | struct Object *head; |
29 | struct Object *cdr; | 30 | struct Object *tail; |
30 | }; | 31 | }; |
32 | |||
33 | // OBJ_TYPE_LAMBDA | ||
34 | struct { | ||
35 | struct Object **params; | ||
36 | struct Object **body; | ||
37 | }; | ||
31 | }; | 38 | }; |
32 | 39 | ||
33 | size_t line; | 40 | size_t line; |
@@ -55,6 +62,7 @@ Object * parse_string(Token tok); | |||
55 | Object * parse_bool(Token tok); | 62 | Object * parse_bool(Token tok); |
56 | Object * parse_fixnum(Token tok); | 63 | Object * parse_fixnum(Token tok); |
57 | Object * parse_list(Parser *parser, Errors *errors); | 64 | Object * parse_list(Parser *parser, Errors *errors); |
65 | Object * parse_lambda(Parser *parser, Errors *errors); | ||
58 | Root * parse(Token *tokens, Errors *errors); | 66 | Root * parse(Token *tokens, Errors *errors); |
59 | 67 | ||
60 | // Object operations. | 68 | // Object operations. |
@@ -63,7 +71,7 @@ void object_display(Object *obj); | |||
63 | // Manage resources. | 71 | // Manage resources. |
64 | Object * object_alloc(Token tok, ObjectType type); | 72 | Object * object_alloc(Token tok, ObjectType type); |
65 | void object_free(Object *node); | 73 | void object_free(Object *node); |
66 | void free_roots(Root *roots); | 74 | void free_objects(void); |
67 | 75 | ||
68 | // | 76 | // |
69 | // Helper macros. | 77 | // Helper macros. |
@@ -77,6 +85,7 @@ void free_roots(Root *roots); | |||
77 | #define IS_STRING(VAL) ((VAL)->type == OBJ_TYPE_STRING) | 85 | #define IS_STRING(VAL) ((VAL)->type == OBJ_TYPE_STRING) |
78 | #define IS_SYMBOL(VAL) ((VAL)->type == OBJ_TYPE_SYMBOL) | 86 | #define IS_SYMBOL(VAL) ((VAL)->type == OBJ_TYPE_SYMBOL) |
79 | #define IS_PAIR(VAL) ((VAL)->type == OBJ_TYPE_PAIR) | 87 | #define IS_PAIR(VAL) ((VAL)->type == OBJ_TYPE_PAIR) |
88 | #define IS_LAMBDA(VAL) ((VAL)->type == OBJ_TYPE_LAMBDA) | ||
80 | 89 | ||
81 | // Debug. | 90 | // Debug. |
82 | #define OBJ_PRINT(OBJ) object_display(OBJ); printf("\n"); | 91 | #define OBJ_PRINT(OBJ) object_display(OBJ); printf("\n"); |