diff options
Diffstat (limited to 'src/parser.c')
-rw-r--r-- | src/parser.c | 257 |
1 files changed, 257 insertions, 0 deletions
diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..2d70c9d --- /dev/null +++ b/src/parser.c | |||
@@ -0,0 +1,257 @@ | |||
1 | #include "parser.h" | ||
2 | #include "darray.h" | ||
3 | |||
4 | Token | ||
5 | peek_token(const Parser *parser) { | ||
6 | return parser->tokens[parser->current]; | ||
7 | } | ||
8 | |||
9 | Token | ||
10 | next_token(Parser *parser) { | ||
11 | return parser->tokens[parser->current++]; | ||
12 | } | ||
13 | |||
14 | Token | ||
15 | previous_token(Parser *parser) { | ||
16 | return parser->tokens[parser->current - 1]; | ||
17 | } | ||
18 | |||
19 | Token | ||
20 | rewind_token(Parser *parser) { | ||
21 | return parser->tokens[--parser->current]; | ||
22 | } | ||
23 | |||
24 | bool | ||
25 | has_next_token(const Parser *parser) { | ||
26 | return parser->current < array_size(parser->tokens) | ||
27 | && peek_token(parser).type != TOKEN_EOF; | ||
28 | } | ||
29 | |||
30 | Object * | ||
31 | parse_fixnum(Token tok) { | ||
32 | ssize_t num = 0; | ||
33 | ssize_t sign = 1; | ||
34 | for (size_t i = 0; i < tok.value.n; i++) { | ||
35 | char c = tok.value.start[i]; | ||
36 | if (c == '-') { | ||
37 | sign = -1; | ||
38 | continue; | ||
39 | } | ||
40 | num = num * 10 + (c - '0'); | ||
41 | } | ||
42 | Object *ret = object_alloc(tok, OBJ_TYPE_FIXNUM); | ||
43 | ret->fixnum = num * sign; | ||
44 | return ret; | ||
45 | } | ||
46 | |||
47 | Object * | ||
48 | parse_bool(Token tok) { | ||
49 | ObjectType type = tok.type == TOKEN_TRUE ? OBJ_TYPE_TRUE : OBJ_TYPE_FALSE; | ||
50 | Object *ret = object_alloc(tok, type); | ||
51 | return ret; | ||
52 | } | ||
53 | |||
54 | Object * | ||
55 | parse_string(Token tok) { | ||
56 | Object *ret = object_alloc(tok, OBJ_TYPE_STRING); | ||
57 | array_init(ret->text, tok.value.n); | ||
58 | array_insert(ret->text, tok.value.start, tok.value.n); | ||
59 | return ret; | ||
60 | } | ||
61 | |||
62 | Object * | ||
63 | parse_symbol(Token tok) { | ||
64 | Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL); | ||
65 | array_init(ret->text, tok.value.n); | ||
66 | array_insert(ret->text, tok.value.start, tok.value.n); | ||
67 | return ret; | ||
68 | } | ||
69 | |||
70 | Object * | ||
71 | parse_list(Parser *parser, Errors *errors) { | ||
72 | if (errors->n != 0) { | ||
73 | return NULL; | ||
74 | } | ||
75 | Token start = previous_token(parser); | ||
76 | Object *root = object_alloc(start, OBJ_TYPE_PAIR); | ||
77 | root->car = NULL; | ||
78 | root->cdr = NULL; | ||
79 | Object *current = root; | ||
80 | while (has_next_token(parser)) { | ||
81 | current->car = parse_tree(parser, errors); | ||
82 | if (errors->n != 0 || current->car == NULL) { | ||
83 | object_free(root); | ||
84 | return NULL; | ||
85 | } | ||
86 | |||
87 | Token tok = peek_token(parser); | ||
88 | if (tok.type == TOKEN_RPAREN) { | ||
89 | next_token(parser); | ||
90 | return root; | ||
91 | } | ||
92 | |||
93 | Object *next = object_alloc(start, OBJ_TYPE_PAIR); | ||
94 | next->car = NULL; | ||
95 | next->cdr = NULL; | ||
96 | current->cdr = next; | ||
97 | current = current->cdr; | ||
98 | } | ||
99 | object_free(root); | ||
100 | error_push(errors, (Error){ | ||
101 | .type = ERR_TYPE_PARSER, | ||
102 | .value = ERR_UNBALANCED_PAREN, | ||
103 | .line = start.line, | ||
104 | .col = start.col, | ||
105 | }); | ||
106 | return NULL; | ||
107 | } | ||
108 | |||
109 | Object * | ||
110 | parse_tree(Parser *parser, Errors *errors) { | ||
111 | Token tok = next_token(parser); | ||
112 | if (errors->n != 0) { | ||
113 | return NULL; | ||
114 | } | ||
115 | switch (tok.type) { | ||
116 | case TOKEN_FIXNUM: { | ||
117 | return parse_fixnum(tok); | ||
118 | } break; | ||
119 | case TOKEN_TRUE: | ||
120 | case TOKEN_FALSE: { | ||
121 | return parse_bool(tok); | ||
122 | } break; | ||
123 | case TOKEN_RPAREN: { | ||
124 | error_push(errors, (Error){ | ||
125 | .type = ERR_TYPE_PARSER, | ||
126 | .value = ERR_UNBALANCED_PAREN, | ||
127 | .line = tok.line, | ||
128 | .col = tok.col, | ||
129 | }); | ||
130 | return NULL; | ||
131 | } break; | ||
132 | case TOKEN_LPAREN: { | ||
133 | return parse_list(parser, errors); | ||
134 | } break; | ||
135 | case TOKEN_STRING: { | ||
136 | return parse_string(tok); | ||
137 | } break; | ||
138 | case TOKEN_SYMBOL: { | ||
139 | return parse_symbol(tok); | ||
140 | } break; | ||
141 | case TOKEN_NIL: { | ||
142 | return object_alloc(tok, OBJ_TYPE_NIL); | ||
143 | } break; | ||
144 | default: { | ||
145 | break; | ||
146 | } break; | ||
147 | } | ||
148 | error_push(errors, (Error){ | ||
149 | .type = ERR_TYPE_PARSER, | ||
150 | .value = ERR_EOF_REACHED, | ||
151 | .line = tok.line, | ||
152 | .col = tok.col, | ||
153 | }); | ||
154 | return NULL; | ||
155 | } | ||
156 | |||
157 | Root * | ||
158 | parse(Token *tokens, Errors *errors) { | ||
159 | Root *roots = NULL; | ||
160 | array_init(roots, 0); | ||
161 | Parser parser = { | ||
162 | .tokens = tokens, | ||
163 | .current = 0, | ||
164 | }; | ||
165 | while (has_next_token(&parser)) { | ||
166 | Object *root = parse_tree(&parser, errors); | ||
167 | OBJ_PRINT(root); | ||
168 | if (errors->n != 0) { | ||
169 | break; | ||
170 | } | ||
171 | array_push(roots, root); | ||
172 | } | ||
173 | return roots; | ||
174 | } | ||
175 | |||
176 | Object * | ||
177 | object_alloc(Token tok, ObjectType type) { | ||
178 | Object *node = malloc(sizeof(Object)); | ||
179 | node->line = tok.line; | ||
180 | node->col = tok.col; | ||
181 | node->type = type; | ||
182 | return node; | ||
183 | } | ||
184 | |||
185 | void | ||
186 | object_free(Object *node) { | ||
187 | if (node == NULL) { | ||
188 | return; | ||
189 | } | ||
190 | if (IS_STRING(node) || IS_SYMBOL(node)) { | ||
191 | array_free(node->text); | ||
192 | } | ||
193 | if (IS_PAIR(node)) { | ||
194 | object_free(node->car); | ||
195 | object_free(node->cdr); | ||
196 | } | ||
197 | free(node); | ||
198 | } | ||
199 | |||
200 | void | ||
201 | free_roots(Root *roots) { | ||
202 | if (roots != NULL) { | ||
203 | for (size_t i = 0; i < array_size(roots); i++) { | ||
204 | object_free(roots[i]); | ||
205 | } | ||
206 | array_free(roots); | ||
207 | } | ||
208 | } | ||
209 | |||
210 | void | ||
211 | display_pair(Object *obj) { | ||
212 | object_display(obj->car); | ||
213 | if (obj->cdr == NULL) { | ||
214 | return; | ||
215 | } | ||
216 | if (IS_PAIR(obj->cdr)) { | ||
217 | printf(" "); | ||
218 | display_pair(obj->cdr); | ||
219 | } else { | ||
220 | printf(" . "); | ||
221 | object_display(obj->cdr); | ||
222 | } | ||
223 | } | ||
224 | |||
225 | void | ||
226 | object_display(Object *obj) { | ||
227 | if (obj == NULL) { | ||
228 | printf("#{error}"); | ||
229 | return; | ||
230 | } | ||
231 | switch (obj->type) { | ||
232 | case OBJ_TYPE_FIXNUM: { | ||
233 | printf("%zd", obj->fixnum); | ||
234 | } break; | ||
235 | case OBJ_TYPE_TRUE: { | ||
236 | printf("true"); | ||
237 | } break; | ||
238 | case OBJ_TYPE_FALSE: { | ||
239 | printf("false"); | ||
240 | } break; | ||
241 | case OBJ_TYPE_NIL: { | ||
242 | printf("()"); | ||
243 | } break; | ||
244 | case OBJ_TYPE_STRING: { | ||
245 | printf("\"%.*s\"", (int)array_size(obj->text), obj->text); | ||
246 | } break; | ||
247 | case OBJ_TYPE_SYMBOL: { | ||
248 | printf(":%.*s", (int)array_size(obj->text), obj->text); | ||
249 | } break; | ||
250 | case OBJ_TYPE_PAIR: { | ||
251 | printf("("); | ||
252 | display_pair(obj); | ||
253 | printf(")"); | ||
254 | } break; | ||
255 | } | ||
256 | return; | ||
257 | } | ||