aboutsummaryrefslogtreecommitdiffstats
path: root/src/parser.c
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2021-10-29 19:11:40 +0200
committerBad Diode <bd@badd10de.dev>2021-10-29 19:11:40 +0200
commit5ed73b695e6b463149ab0c9ae3eccb26a4ec5807 (patch)
tree01aa089934d1b49fe515fe86fffca01c471c69e9 /src/parser.c
parente73a4c16a2269cdb2f5e7d66fb9839e4c44e14de (diff)
downloadbdl-5ed73b695e6b463149ab0c9ae3eccb26a4ec5807.tar.gz
bdl-5ed73b695e6b463149ab0c9ae3eccb26a4ec5807.zip
Add parser for tokens->ast conversion
Diffstat (limited to 'src/parser.c')
-rw-r--r--src/parser.c257
1 files changed, 257 insertions, 0 deletions
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..2d70c9d
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,257 @@
1#include "parser.h"
2#include "darray.h"
3
4Token
5peek_token(const Parser *parser) {
6 return parser->tokens[parser->current];
7}
8
9Token
10next_token(Parser *parser) {
11 return parser->tokens[parser->current++];
12}
13
14Token
15previous_token(Parser *parser) {
16 return parser->tokens[parser->current - 1];
17}
18
19Token
20rewind_token(Parser *parser) {
21 return parser->tokens[--parser->current];
22}
23
24bool
25has_next_token(const Parser *parser) {
26 return parser->current < array_size(parser->tokens)
27 && peek_token(parser).type != TOKEN_EOF;
28}
29
30Object *
31parse_fixnum(Token tok) {
32 ssize_t num = 0;
33 ssize_t sign = 1;
34 for (size_t i = 0; i < tok.value.n; i++) {
35 char c = tok.value.start[i];
36 if (c == '-') {
37 sign = -1;
38 continue;
39 }
40 num = num * 10 + (c - '0');
41 }
42 Object *ret = object_alloc(tok, OBJ_TYPE_FIXNUM);
43 ret->fixnum = num * sign;
44 return ret;
45}
46
47Object *
48parse_bool(Token tok) {
49 ObjectType type = tok.type == TOKEN_TRUE ? OBJ_TYPE_TRUE : OBJ_TYPE_FALSE;
50 Object *ret = object_alloc(tok, type);
51 return ret;
52}
53
54Object *
55parse_string(Token tok) {
56 Object *ret = object_alloc(tok, OBJ_TYPE_STRING);
57 array_init(ret->text, tok.value.n);
58 array_insert(ret->text, tok.value.start, tok.value.n);
59 return ret;
60}
61
62Object *
63parse_symbol(Token tok) {
64 Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL);
65 array_init(ret->text, tok.value.n);
66 array_insert(ret->text, tok.value.start, tok.value.n);
67 return ret;
68}
69
70Object *
71parse_list(Parser *parser, Errors *errors) {
72 if (errors->n != 0) {
73 return NULL;
74 }
75 Token start = previous_token(parser);
76 Object *root = object_alloc(start, OBJ_TYPE_PAIR);
77 root->car = NULL;
78 root->cdr = NULL;
79 Object *current = root;
80 while (has_next_token(parser)) {
81 current->car = parse_tree(parser, errors);
82 if (errors->n != 0 || current->car == NULL) {
83 object_free(root);
84 return NULL;
85 }
86
87 Token tok = peek_token(parser);
88 if (tok.type == TOKEN_RPAREN) {
89 next_token(parser);
90 return root;
91 }
92
93 Object *next = object_alloc(start, OBJ_TYPE_PAIR);
94 next->car = NULL;
95 next->cdr = NULL;
96 current->cdr = next;
97 current = current->cdr;
98 }
99 object_free(root);
100 error_push(errors, (Error){
101 .type = ERR_TYPE_PARSER,
102 .value = ERR_UNBALANCED_PAREN,
103 .line = start.line,
104 .col = start.col,
105 });
106 return NULL;
107}
108
109Object *
110parse_tree(Parser *parser, Errors *errors) {
111 Token tok = next_token(parser);
112 if (errors->n != 0) {
113 return NULL;
114 }
115 switch (tok.type) {
116 case TOKEN_FIXNUM: {
117 return parse_fixnum(tok);
118 } break;
119 case TOKEN_TRUE:
120 case TOKEN_FALSE: {
121 return parse_bool(tok);
122 } break;
123 case TOKEN_RPAREN: {
124 error_push(errors, (Error){
125 .type = ERR_TYPE_PARSER,
126 .value = ERR_UNBALANCED_PAREN,
127 .line = tok.line,
128 .col = tok.col,
129 });
130 return NULL;
131 } break;
132 case TOKEN_LPAREN: {
133 return parse_list(parser, errors);
134 } break;
135 case TOKEN_STRING: {
136 return parse_string(tok);
137 } break;
138 case TOKEN_SYMBOL: {
139 return parse_symbol(tok);
140 } break;
141 case TOKEN_NIL: {
142 return object_alloc(tok, OBJ_TYPE_NIL);
143 } break;
144 default: {
145 break;
146 } break;
147 }
148 error_push(errors, (Error){
149 .type = ERR_TYPE_PARSER,
150 .value = ERR_EOF_REACHED,
151 .line = tok.line,
152 .col = tok.col,
153 });
154 return NULL;
155}
156
157Root *
158parse(Token *tokens, Errors *errors) {
159 Root *roots = NULL;
160 array_init(roots, 0);
161 Parser parser = {
162 .tokens = tokens,
163 .current = 0,
164 };
165 while (has_next_token(&parser)) {
166 Object *root = parse_tree(&parser, errors);
167 OBJ_PRINT(root);
168 if (errors->n != 0) {
169 break;
170 }
171 array_push(roots, root);
172 }
173 return roots;
174}
175
176Object *
177object_alloc(Token tok, ObjectType type) {
178 Object *node = malloc(sizeof(Object));
179 node->line = tok.line;
180 node->col = tok.col;
181 node->type = type;
182 return node;
183}
184
185void
186object_free(Object *node) {
187 if (node == NULL) {
188 return;
189 }
190 if (IS_STRING(node) || IS_SYMBOL(node)) {
191 array_free(node->text);
192 }
193 if (IS_PAIR(node)) {
194 object_free(node->car);
195 object_free(node->cdr);
196 }
197 free(node);
198}
199
200void
201free_roots(Root *roots) {
202 if (roots != NULL) {
203 for (size_t i = 0; i < array_size(roots); i++) {
204 object_free(roots[i]);
205 }
206 array_free(roots);
207 }
208}
209
210void
211display_pair(Object *obj) {
212 object_display(obj->car);
213 if (obj->cdr == NULL) {
214 return;
215 }
216 if (IS_PAIR(obj->cdr)) {
217 printf(" ");
218 display_pair(obj->cdr);
219 } else {
220 printf(" . ");
221 object_display(obj->cdr);
222 }
223}
224
225void
226object_display(Object *obj) {
227 if (obj == NULL) {
228 printf("#{error}");
229 return;
230 }
231 switch (obj->type) {
232 case OBJ_TYPE_FIXNUM: {
233 printf("%zd", obj->fixnum);
234 } break;
235 case OBJ_TYPE_TRUE: {
236 printf("true");
237 } break;
238 case OBJ_TYPE_FALSE: {
239 printf("false");
240 } break;
241 case OBJ_TYPE_NIL: {
242 printf("()");
243 } break;
244 case OBJ_TYPE_STRING: {
245 printf("\"%.*s\"", (int)array_size(obj->text), obj->text);
246 } break;
247 case OBJ_TYPE_SYMBOL: {
248 printf(":%.*s", (int)array_size(obj->text), obj->text);
249 } break;
250 case OBJ_TYPE_PAIR: {
251 printf("(");
252 display_pair(obj);
253 printf(")");
254 } break;
255 }
256 return;
257}