diff options
author | Bad Diode <bd@badd10de.dev> | 2022-04-03 18:54:51 -0300 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2022-04-03 18:54:51 -0300 |
commit | c8b53cb4590d8d6cbfc5cbf891809ddd99e33fe5 (patch) | |
tree | 716f3676abb4a777add1683390cfc37a605fe911 | |
parent | 496ec36c8002a85dc0c3bb62de6d176e369b40af (diff) | |
download | bdl-c8b53cb4590d8d6cbfc5cbf891809ddd99e33fe5.tar.gz bdl-c8b53cb4590d8d6cbfc5cbf891809ddd99e33fe5.zip |
Add parsing for function definitions
This commits also changes the structure of some existing functions.
Namely, parse_* functions other than parse_next check that the type of
the token to parse is correct. This allow us to use them directly in the
rest of the code to consume tokens and properly produce an error if the
token type is not the expected one.
In the same fashion, two new functions consume_lparen and consume_rparen
are implemented. They only report true/false and report errors if
something went wrong.
-rw-r--r-- | src/errors.c | 7 | ||||
-rw-r--r-- | src/errors.h | 7 | ||||
-rw-r--r-- | src/nodes.c | 28 | ||||
-rw-r--r-- | src/nodes.h | 14 | ||||
-rw-r--r-- | src/parser.c | 151 |
5 files changed, 180 insertions, 27 deletions
diff --git a/src/errors.c b/src/errors.c index e69e4f9..b93b462 100644 --- a/src/errors.c +++ b/src/errors.c | |||
@@ -8,6 +8,13 @@ static const char* error_msgs[] = { | |||
8 | [ERR_MALFORMED_NUMBER] = "error: malformed number token", | 8 | [ERR_MALFORMED_NUMBER] = "error: malformed number token", |
9 | [ERR_MALFORMED_EXPR] = "error: malformed expression", | 9 | [ERR_MALFORMED_EXPR] = "error: malformed expression", |
10 | [ERR_UNIMPLEMENTED] = "error: not implemented", | 10 | [ERR_UNIMPLEMENTED] = "error: not implemented", |
11 | [ERR_NOT_A_NUMBER] = "error: expected a number", | ||
12 | [ERR_NOT_A_SYMBOL] = "error: expected a symbol", | ||
13 | [ERR_NOT_A_STRING] = "error: expected a string", | ||
14 | [ERR_NOT_A_TYPE] = "error: expected a type", | ||
15 | [ERR_NOT_A_BOOL] = "error: expected a bool", | ||
16 | [ERR_NOT_A_LPAREN] = "error: expected opening parentheses (lparen)", | ||
17 | [ERR_NOT_A_RPAREN] = "error: expected closing parentheses (rparen)", | ||
11 | }; | 18 | }; |
12 | 19 | ||
13 | static Error current_error = {.value = ERR_OK}; | 20 | static Error current_error = {.value = ERR_OK}; |
diff --git a/src/errors.h b/src/errors.h index 25f9945..871711b 100644 --- a/src/errors.h +++ b/src/errors.h | |||
@@ -16,6 +16,13 @@ typedef enum ErrorValue { | |||
16 | ERR_MALFORMED_NUMBER, | 16 | ERR_MALFORMED_NUMBER, |
17 | ERR_MALFORMED_EXPR, | 17 | ERR_MALFORMED_EXPR, |
18 | ERR_UNIMPLEMENTED, | 18 | ERR_UNIMPLEMENTED, |
19 | ERR_NOT_A_NUMBER, | ||
20 | ERR_NOT_A_SYMBOL, | ||
21 | ERR_NOT_A_STRING, | ||
22 | ERR_NOT_A_TYPE, | ||
23 | ERR_NOT_A_BOOL, | ||
24 | ERR_NOT_A_LPAREN, | ||
25 | ERR_NOT_A_RPAREN, | ||
19 | ERR_OK, | 26 | ERR_OK, |
20 | } ErrorValue; | 27 | } ErrorValue; |
21 | 28 | ||
diff --git a/src/nodes.c b/src/nodes.c index 855d186..fc39d8e 100644 --- a/src/nodes.c +++ b/src/nodes.c | |||
@@ -23,6 +23,7 @@ print_node(Node *node) { | |||
23 | } | 23 | } |
24 | } break; | 24 | } break; |
25 | case NODE_SYMBOL: | 25 | case NODE_SYMBOL: |
26 | case NODE_TYPE: | ||
26 | case NODE_STRING: { | 27 | case NODE_STRING: { |
27 | sv_write(&node->string); | 28 | sv_write(&node->string); |
28 | } break; | 29 | } break; |
@@ -51,7 +52,7 @@ print_node(Node *node) { | |||
51 | printf("(def "); | 52 | printf("(def "); |
52 | print_node(node->def.symbol); | 53 | print_node(node->def.symbol); |
53 | printf(":"); | 54 | printf(":"); |
54 | sv_write(&node->def.type); | 55 | print_node(node->def.type); |
55 | printf(" "); | 56 | printf(" "); |
56 | print_node(node->def.value); | 57 | print_node(node->def.value); |
57 | printf(")"); | 58 | printf(")"); |
@@ -63,6 +64,31 @@ print_node(Node *node) { | |||
63 | print_node(node->def.value); | 64 | print_node(node->def.value); |
64 | printf(")"); | 65 | printf(")"); |
65 | } break; | 66 | } break; |
67 | case NODE_FUN: { | ||
68 | printf("(fun "); | ||
69 | print_node(node->fun.name); | ||
70 | printf(" ("); | ||
71 | size_t n_params = array_size(node->fun.param_names); | ||
72 | for (size_t i = 0; i < n_params; ++i) { | ||
73 | print_node(node->fun.param_names[i]); | ||
74 | printf(":"); | ||
75 | print_node(node->fun.param_types[i]); | ||
76 | if (i < n_params - 1) { | ||
77 | printf(" "); | ||
78 | } | ||
79 | } | ||
80 | printf(")"); | ||
81 | printf(": "); | ||
82 | print_node(node->fun.return_type); | ||
83 | |||
84 | size_t n_expr = array_size(node->fun.body); | ||
85 | for (size_t i = 0; i < n_expr; ++i) { | ||
86 | printf(" "); | ||
87 | print_node(node->fun.body[i]); | ||
88 | } | ||
89 | |||
90 | printf(")"); | ||
91 | } break; | ||
66 | default: { printf("{#unknown#}"); } break; | 92 | default: { printf("{#unknown#}"); } break; |
67 | } | 93 | } |
68 | } | 94 | } |
diff --git a/src/nodes.h b/src/nodes.h index 566d6d3..2e24ea5 100644 --- a/src/nodes.h +++ b/src/nodes.h | |||
@@ -7,8 +7,10 @@ typedef enum NodeType { | |||
7 | NODE_BOOL, | 7 | NODE_BOOL, |
8 | NODE_STRING, | 8 | NODE_STRING, |
9 | NODE_SYMBOL, | 9 | NODE_SYMBOL, |
10 | NODE_TYPE, | ||
10 | NODE_DEF, | 11 | NODE_DEF, |
11 | NODE_SET, | 12 | NODE_SET, |
13 | NODE_FUN, | ||
12 | } NodeType; | 14 | } NodeType; |
13 | 15 | ||
14 | typedef struct Node { | 16 | typedef struct Node { |
@@ -38,13 +40,23 @@ typedef struct Node { | |||
38 | struct { | 40 | struct { |
39 | struct Node *symbol; | 41 | struct Node *symbol; |
40 | struct Node *value; | 42 | struct Node *value; |
41 | StringView type; | 43 | struct Node *type; |
42 | } def; | 44 | } def; |
43 | 45 | ||
46 | // Variable assignment. | ||
44 | struct { | 47 | struct { |
45 | struct Node *symbol; | 48 | struct Node *symbol; |
46 | struct Node *value; | 49 | struct Node *value; |
47 | } set; | 50 | } set; |
51 | |||
52 | // Function definition. | ||
53 | struct { | ||
54 | struct Node *name; | ||
55 | struct Node **param_names; | ||
56 | struct Node **param_types; | ||
57 | struct Node *return_type; | ||
58 | struct Node **body; | ||
59 | } fun; | ||
48 | }; | 60 | }; |
49 | } Node; | 61 | } Node; |
50 | 62 | ||
diff --git a/src/parser.c b/src/parser.c index b36ddc9..cfbefaf 100644 --- a/src/parser.c +++ b/src/parser.c | |||
@@ -16,9 +16,33 @@ has_next(Parser *parser) { | |||
16 | return parser->current < array_size(parser->tokens); | 16 | return parser->current < array_size(parser->tokens); |
17 | } | 17 | } |
18 | 18 | ||
19 | bool | ||
20 | consume_rparen(Parser *parser) { | ||
21 | Token tok = next_token(parser); | ||
22 | if (tok.type != TOKEN_RPAREN) { | ||
23 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_RPAREN, tok.line, tok.col); | ||
24 | return false; | ||
25 | } | ||
26 | return true; | ||
27 | } | ||
28 | |||
29 | bool | ||
30 | consume_lparen(Parser *parser) { | ||
31 | Token tok = next_token(parser); | ||
32 | if (tok.type != TOKEN_LPAREN) { | ||
33 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_LPAREN, tok.line, tok.col); | ||
34 | return false; | ||
35 | } | ||
36 | return true; | ||
37 | } | ||
38 | |||
19 | Node * | 39 | Node * |
20 | parse_number(Parser *parser) { | 40 | parse_number(Parser *parser) { |
21 | Token tok = next_token(parser); | 41 | Token tok = next_token(parser); |
42 | if (tok.type != TOKEN_NUMBER) { | ||
43 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_NUMBER, tok.line, tok.col); | ||
44 | return NULL; | ||
45 | } | ||
22 | 46 | ||
23 | bool negative = false; | 47 | bool negative = false; |
24 | int base = 10; | 48 | int base = 10; |
@@ -93,6 +117,10 @@ parse_number(Parser *parser) { | |||
93 | Node * | 117 | Node * |
94 | parse_string(Parser *parser) { | 118 | parse_string(Parser *parser) { |
95 | Token tok = next_token(parser); | 119 | Token tok = next_token(parser); |
120 | if (tok.type != TOKEN_STRING) { | ||
121 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_STRING, tok.line, tok.col); | ||
122 | return NULL; | ||
123 | } | ||
96 | Node *node = alloc_node(NODE_STRING); | 124 | Node *node = alloc_node(NODE_STRING); |
97 | node->string = tok.value; | 125 | node->string = tok.value; |
98 | return node; | 126 | return node; |
@@ -101,14 +129,39 @@ parse_string(Parser *parser) { | |||
101 | Node * | 129 | Node * |
102 | parse_symbol(Parser *parser) { | 130 | parse_symbol(Parser *parser) { |
103 | Token tok = next_token(parser); | 131 | Token tok = next_token(parser); |
132 | if (tok.type != TOKEN_SYMBOL) { | ||
133 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_SYMBOL, tok.line, tok.col); | ||
134 | return NULL; | ||
135 | } | ||
104 | Node *node = alloc_node(NODE_SYMBOL); | 136 | Node *node = alloc_node(NODE_SYMBOL); |
105 | node->string = tok.value; | 137 | node->string = tok.value; |
106 | return node; | 138 | return node; |
107 | } | 139 | } |
108 | 140 | ||
109 | Node * | 141 | Node * |
142 | parse_type(Parser *parser) { | ||
143 | Token tok = next_token(parser); | ||
144 | if (tok.type != TOKEN_COLON) { | ||
145 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); | ||
146 | return NULL; | ||
147 | } | ||
148 | tok = next_token(parser); | ||
149 | if (tok.type != TOKEN_SYMBOL) { | ||
150 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_TYPE, tok.line, tok.col); | ||
151 | return NULL; | ||
152 | } | ||
153 | Node *node = alloc_node(NODE_TYPE); | ||
154 | node->string = tok.value; | ||
155 | return node; | ||
156 | } | ||
157 | |||
158 | Node * | ||
110 | parse_bool(Parser *parser) { | 159 | parse_bool(Parser *parser) { |
111 | Token tok = next_token(parser); | 160 | Token tok = next_token(parser); |
161 | if (!(tok.type == TOKEN_TRUE || tok.type == TOKEN_FALSE)) { | ||
162 | push_error(ERR_TYPE_PARSER, ERR_NOT_A_BOOL, tok.line, tok.col); | ||
163 | return NULL; | ||
164 | } | ||
112 | Node *node = alloc_node(NODE_BOOL); | 165 | Node *node = alloc_node(NODE_BOOL); |
113 | node->boolean = tok.type == TOKEN_TRUE; | 166 | node->boolean = tok.type == TOKEN_TRUE; |
114 | return node; | 167 | return node; |
@@ -138,63 +191,51 @@ parse_builtin(Parser *parser) { | |||
138 | 191 | ||
139 | Node * | 192 | Node * |
140 | parse_def(Parser *parser) { | 193 | parse_def(Parser *parser) { |
141 | Token op = next_token(parser); | 194 | next_token(parser); // Skip keyword. |
142 | Node *symbol = parse_next(parser); | 195 | |
143 | if (symbol == NULL || symbol->type != NODE_SYMBOL) { | 196 | Node *symbol = parse_symbol(parser); |
144 | push_error(ERR_TYPE_PARSER, ERR_MALFORMED_EXPR, op.line, op.col); | 197 | if (symbol == NULL) { |
145 | return NULL; | 198 | return NULL; |
146 | } | 199 | } |
147 | 200 | ||
148 | // TODO: Making type checking mandatory for now until we introduce | 201 | // TODO: Making type checking mandatory for now until we introduce |
149 | // type inference. | 202 | // type inference. |
150 | Token next = next_token(parser); | 203 | Node *type = parse_type(parser); |
151 | if (next.type != TOKEN_COLON) { | 204 | if (type == NULL) { |
152 | push_error(ERR_TYPE_PARSER, ERR_MALFORMED_EXPR, op.line, op.col); | ||
153 | return NULL; | ||
154 | } | ||
155 | Token type_name = next_token(parser); | ||
156 | if (type_name.type != TOKEN_SYMBOL) { | ||
157 | push_error(ERR_TYPE_PARSER, ERR_MALFORMED_EXPR, op.line, op.col); | ||
158 | return NULL; | 205 | return NULL; |
159 | } | 206 | } |
160 | 207 | ||
161 | Node *value = parse_next(parser); | 208 | Node *value = parse_next(parser); |
162 | if (value == NULL) { | 209 | if (value == NULL) { |
163 | push_error(ERR_TYPE_PARSER, ERR_MALFORMED_EXPR, op.line, op.col); | ||
164 | return NULL; | 210 | return NULL; |
165 | } | 211 | } |
166 | 212 | ||
167 | Token end = next_token(parser); | 213 | if (!consume_rparen(parser)) { |
168 | if (end.type != TOKEN_RPAREN) { | ||
169 | push_error(ERR_TYPE_PARSER, ERR_MALFORMED_EXPR, op.line, op.col); | ||
170 | return NULL; | 214 | return NULL; |
171 | } | 215 | } |
172 | 216 | ||
173 | Node *node = alloc_node(NODE_DEF); | 217 | Node *node = alloc_node(NODE_DEF); |
174 | node->def.symbol = symbol; | 218 | node->def.symbol = symbol; |
175 | node->def.value = value; | 219 | node->def.value = value; |
176 | node->def.type = type_name.value; | 220 | node->def.type = type; |
177 | return node; | 221 | return node; |
178 | } | 222 | } |
179 | 223 | ||
180 | Node * | 224 | Node * |
181 | parse_set(Parser *parser) { | 225 | parse_set(Parser *parser) { |
182 | Token op = next_token(parser); | 226 | next_token(parser); // Skip keyword. |
183 | Node *symbol = parse_next(parser); | 227 | |
184 | if (symbol == NULL || symbol->type != NODE_SYMBOL) { | 228 | Node *symbol = parse_symbol(parser); |
185 | push_error(ERR_TYPE_PARSER, ERR_MALFORMED_EXPR, op.line, op.col); | 229 | if (symbol == NULL) { |
186 | return NULL; | 230 | return NULL; |
187 | } | 231 | } |
188 | 232 | ||
189 | Node *value = parse_next(parser); | 233 | Node *value = parse_next(parser); |
190 | if (value == NULL) { | 234 | if (value == NULL) { |
191 | push_error(ERR_TYPE_PARSER, ERR_MALFORMED_EXPR, op.line, op.col); | ||
192 | return NULL; | 235 | return NULL; |
193 | } | 236 | } |
194 | 237 | ||
195 | Token end = next_token(parser); | 238 | if (!consume_rparen(parser)) { |
196 | if (end.type != TOKEN_RPAREN) { | ||
197 | push_error(ERR_TYPE_PARSER, ERR_MALFORMED_EXPR, op.line, op.col); | ||
198 | return NULL; | 239 | return NULL; |
199 | } | 240 | } |
200 | 241 | ||
@@ -205,6 +246,65 @@ parse_set(Parser *parser) { | |||
205 | } | 246 | } |
206 | 247 | ||
207 | Node * | 248 | Node * |
249 | parse_fun(Parser *parser) { | ||
250 | next_token(parser); // Skip keyword. | ||
251 | |||
252 | Node *name = parse_symbol(parser); | ||
253 | if (name == NULL) { | ||
254 | return NULL; | ||
255 | } | ||
256 | |||
257 | Node *node = alloc_node(NODE_FUN); | ||
258 | node->fun.name = name; | ||
259 | array_init(node->fun.param_names, 0); | ||
260 | array_init(node->fun.param_types, 0); | ||
261 | array_init(node->fun.body, 0); | ||
262 | |||
263 | // Parse parameter list and return type. | ||
264 | if (!consume_lparen(parser)) { | ||
265 | return NULL; | ||
266 | } | ||
267 | while (true) { | ||
268 | Token next = peek_token(parser); | ||
269 | if (next.type == TOKEN_RPAREN) { | ||
270 | next_token(parser); | ||
271 | break; | ||
272 | } | ||
273 | Node *name = parse_symbol(parser); | ||
274 | if (name == NULL) { | ||
275 | return NULL; | ||
276 | } | ||
277 | Node *type = parse_type(parser); | ||
278 | if (type == NULL) { | ||
279 | return NULL; | ||
280 | } | ||
281 | array_push(node->fun.param_names, name); | ||
282 | array_push(node->fun.param_types, type); | ||
283 | } | ||
284 | Node *ret_type = parse_type(parser); | ||
285 | if (ret_type == NULL) { | ||
286 | return NULL; | ||
287 | } | ||
288 | node->fun.return_type = ret_type; | ||
289 | |||
290 | // Parse body. | ||
291 | while (true) { | ||
292 | Token next = peek_token(parser); | ||
293 | if (next.type == TOKEN_RPAREN) { | ||
294 | next_token(parser); | ||
295 | break; | ||
296 | } | ||
297 | Node *expr = parse_next(parser); | ||
298 | if (expr == NULL) { | ||
299 | return NULL; | ||
300 | } | ||
301 | array_push(node->fun.body, expr); | ||
302 | } | ||
303 | |||
304 | return node; | ||
305 | } | ||
306 | |||
307 | Node * | ||
208 | parse_paren(Parser *parser) { | 308 | parse_paren(Parser *parser) { |
209 | next_token(parser); // Skip paren. | 309 | next_token(parser); // Skip paren. |
210 | Token tok = peek_token(parser); | 310 | Token tok = peek_token(parser); |
@@ -222,6 +322,7 @@ parse_paren(Parser *parser) { | |||
222 | // Special functions. | 322 | // Special functions. |
223 | case TOKEN_DEF: { return parse_def(parser); } break; | 323 | case TOKEN_DEF: { return parse_def(parser); } break; |
224 | case TOKEN_SET: { return parse_set(parser); } break; | 324 | case TOKEN_SET: { return parse_set(parser); } break; |
325 | case TOKEN_FUN: { return parse_fun(parser); } break; | ||
225 | default: break; | 326 | default: break; |
226 | } | 327 | } |
227 | 328 | ||