diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/badlib.h | 3 | ||||
-rw-r--r-- | src/lexer.c | 16 | ||||
-rw-r--r-- | src/main.c | 249 |
3 files changed, 231 insertions, 37 deletions
diff --git a/src/badlib.h b/src/badlib.h index 71f4bee..4944d2e 100644 --- a/src/badlib.h +++ b/src/badlib.h | |||
@@ -832,6 +832,9 @@ _array_maybe_grow(void *arr, sz type_size, Arena *a) { | |||
832 | 832 | ||
833 | static inline char * | 833 | static inline char * |
834 | _array_insert(char *arr, const char *src, sz n_bytes, sz type_size, Arena *a) { | 834 | _array_insert(char *arr, const char *src, sz n_bytes, sz type_size, Arena *a) { |
835 | if (!arr) { | ||
836 | arr = _array_reserve(0, 0, a); | ||
837 | } | ||
835 | ArrayHeader *head = array_head(arr); | 838 | ArrayHeader *head = array_head(arr); |
836 | sz new_size = n_bytes + head->size; | 839 | sz new_size = n_bytes + head->size; |
837 | if (new_size > head->cap * type_size) { | 840 | if (new_size > head->cap * type_size) { |
diff --git a/src/lexer.c b/src/lexer.c index df998f2..6a5621c 100644 --- a/src/lexer.c +++ b/src/lexer.c | |||
@@ -1,6 +1,8 @@ | |||
1 | #include "badlib.h" | ||
2 | |||
1 | #define LEXER_MEM GB(2) | 3 | #define LEXER_MEM GB(2) |
2 | 4 | ||
3 | typedef enum TokenType { | 5 | typedef enum TokenKind { |
4 | TOK_UNKNOWN = 0, | 6 | TOK_UNKNOWN = 0, |
5 | 7 | ||
6 | // Parentheses. | 8 | // Parentheses. |
@@ -65,7 +67,7 @@ typedef enum TokenType { | |||
65 | 67 | ||
66 | // End of file. | 68 | // End of file. |
67 | TOK_EOF, | 69 | TOK_EOF, |
68 | } TokenType; | 70 | } TokenKind; |
69 | 71 | ||
70 | Str token_str[] = { | 72 | Str token_str[] = { |
71 | [TOK_UNKNOWN] = cstr("UNKNOWN"), | 73 | [TOK_UNKNOWN] = cstr("UNKNOWN"), |
@@ -135,7 +137,7 @@ Str token_str[] = { | |||
135 | }; | 137 | }; |
136 | 138 | ||
137 | typedef struct Token { | 139 | typedef struct Token { |
138 | TokenType type; | 140 | TokenKind kind; |
139 | Str val; | 141 | Str val; |
140 | sz line; | 142 | sz line; |
141 | sz col; | 143 | sz col; |
@@ -256,7 +258,7 @@ scan_skip_until_valid(Scanner *scanner) { | |||
256 | } | 258 | } |
257 | 259 | ||
258 | Token | 260 | Token |
259 | emit_token(Scanner current, Scanner *scanner, TokenType t) { | 261 | emit_token(Scanner current, Scanner *scanner, TokenKind t) { |
260 | Str val = current.str; | 262 | Str val = current.str; |
261 | val.size = current.str.size - scanner->str.size; | 263 | val.size = current.str.size - scanner->str.size; |
262 | val.size = val.size < 0 ? 0 : val.size; | 264 | val.size = val.size < 0 ? 0 : val.size; |
@@ -264,7 +266,7 @@ emit_token(Scanner current, Scanner *scanner, TokenType t) { | |||
264 | .val = val, | 266 | .val = val, |
265 | .line = current.line + 1, | 267 | .line = current.line + 1, |
266 | .col = current.col + 1, | 268 | .col = current.col + 1, |
267 | .type = t, | 269 | .kind = t, |
268 | }; | 270 | }; |
269 | } | 271 | } |
270 | 272 | ||
@@ -274,7 +276,7 @@ emit_token_err(Scanner *scanner, Str err_msg) { | |||
274 | .line = scanner->line + 1, | 276 | .line = scanner->line + 1, |
275 | .col = scanner->col + 1, | 277 | .col = scanner->col + 1, |
276 | .val = err_msg, | 278 | .val = err_msg, |
277 | .type = TOK_UNKNOWN, | 279 | .kind = TOK_UNKNOWN, |
278 | }; | 280 | }; |
279 | } | 281 | } |
280 | 282 | ||
@@ -430,7 +432,7 @@ scan_token(Scanner *scanner) { | |||
430 | *scanner = current; | 432 | *scanner = current; |
431 | return emit_token_number(scanner); | 433 | return emit_token_number(scanner); |
432 | } | 434 | } |
433 | return emit_token(current, scanner, TOK_ADD); | 435 | return emit_token(current, scanner, TOK_SUB); |
434 | }; | 436 | }; |
435 | case '*': | 437 | case '*': |
436 | return emit_token(current, scanner, TOK_MUL); | 438 | return emit_token(current, scanner, TOK_MUL); |
@@ -22,7 +22,7 @@ init(void) { | |||
22 | 22 | ||
23 | void | 23 | void |
24 | print_token(Token tok) { | 24 | print_token(Token tok) { |
25 | println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.type], tok.val); | 25 | println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.kind], tok.val); |
26 | } | 26 | } |
27 | 27 | ||
28 | void | 28 | void |
@@ -34,13 +34,113 @@ print_tokens(Str path, Token *tokens) { | |||
34 | } | 34 | } |
35 | } | 35 | } |
36 | 36 | ||
37 | // | ||
38 | // Nodes | ||
39 | // | ||
40 | |||
41 | typedef enum NodeKind { | ||
42 | NODE_NUMBER, | ||
43 | // TODO: probably want to handle ints/unsigneds/floats separately. | ||
44 | NODE_ADD, | ||
45 | NODE_SUB, | ||
46 | NODE_DIV, | ||
47 | NODE_MUL, | ||
48 | // TODO: MOD | ||
49 | } NodeKind; | ||
50 | |||
51 | Str node_str[] = { | ||
52 | [NODE_NUMBER] = cstr("NUM"), [NODE_ADD] = cstr("ADD"), | ||
53 | [NODE_SUB] = cstr("SUB"), [NODE_DIV] = cstr("DIV"), | ||
54 | [NODE_MUL] = cstr("MUL"), | ||
55 | }; | ||
56 | |||
57 | typedef struct Node { | ||
58 | sz id; | ||
59 | sz line; | ||
60 | sz col; | ||
61 | |||
62 | NodeKind kind; | ||
63 | union { | ||
64 | f64 d; | ||
65 | f32 f; | ||
66 | sz i; | ||
67 | u64 u; | ||
68 | } value; | ||
69 | struct Node *left; | ||
70 | struct Node *right; | ||
71 | } Node; | ||
72 | |||
73 | Node * | ||
74 | node_alloc(NodeKind kind, Token tok, Arena *a) { | ||
75 | static sz id = 0; | ||
76 | Node *node = arena_calloc((sz)sizeof(Node), a); | ||
77 | node->id = id++; | ||
78 | node->kind = kind; | ||
79 | node->line = tok.line; | ||
80 | node->col = tok.col; | ||
81 | return node; | ||
82 | } | ||
83 | |||
84 | void | ||
85 | print_node(Node *node) { | ||
86 | if (!node) { | ||
87 | return; | ||
88 | } | ||
89 | switch (node->kind) { | ||
90 | case NODE_NUMBER: { | ||
91 | print("%d", node->value.i); | ||
92 | } break; | ||
93 | case NODE_ADD: { | ||
94 | print("(+ "); | ||
95 | print_node(node->left); | ||
96 | print(" "); | ||
97 | print_node(node->right); | ||
98 | print(")"); | ||
99 | } break; | ||
100 | case NODE_SUB: { | ||
101 | print("(- "); | ||
102 | print_node(node->left); | ||
103 | print(" "); | ||
104 | print_node(node->right); | ||
105 | print(")"); | ||
106 | } break; | ||
107 | case NODE_DIV: { | ||
108 | print("(/ "); | ||
109 | print_node(node->left); | ||
110 | print(" "); | ||
111 | print_node(node->right); | ||
112 | print(")"); | ||
113 | } break; | ||
114 | case NODE_MUL: { | ||
115 | print("(* "); | ||
116 | print_node(node->left); | ||
117 | print(" "); | ||
118 | print_node(node->right); | ||
119 | print(")"); | ||
120 | } break; | ||
121 | default: { | ||
122 | eprintln("error: unknown node kind: %d", node->kind); | ||
123 | } break; | ||
124 | } | ||
125 | } | ||
126 | |||
127 | // | ||
128 | // Parsing. | ||
129 | // | ||
130 | |||
37 | typedef struct Parser { | 131 | typedef struct Parser { |
38 | Token current; | 132 | Token current; |
39 | Token previous; | 133 | Token previous; |
40 | Token *tokens; | 134 | Token *tokens; |
41 | sz idx; | 135 | sz idx; |
136 | |||
137 | // Error handling. | ||
42 | bool err; | 138 | bool err; |
43 | bool panic; | 139 | bool panic; |
140 | |||
141 | // Storage. | ||
142 | Node **nodes; | ||
143 | Arena *storage; | ||
44 | } Parser; | 144 | } Parser; |
45 | 145 | ||
46 | typedef enum { | 146 | typedef enum { |
@@ -80,6 +180,7 @@ ParseRule parse_rules[] = { | |||
80 | [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, | 180 | [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, |
81 | [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, | 181 | [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, |
82 | [TOK_NUMBER] = {parse_number, NULL, PREC_NONE}, | 182 | [TOK_NUMBER] = {parse_number, NULL, PREC_NONE}, |
183 | [TOK_EOF] = {NULL, NULL, PREC_NONE}, | ||
83 | }; | 184 | }; |
84 | 185 | ||
85 | void | 186 | void |
@@ -91,9 +192,9 @@ parse_emit_err(Parser *parser, Token token, Str msg) { | |||
91 | parser->err = true; | 192 | parser->err = true; |
92 | eprint("%d:%d: error: %s", token.line, token.col, msg); | 193 | eprint("%d:%d: error: %s", token.line, token.col, msg); |
93 | 194 | ||
94 | if (token.type == TOK_EOF) { | 195 | if (token.kind == TOK_EOF) { |
95 | eprintln(" -> at end of the file"); | 196 | eprintln(" -> at end of the file"); |
96 | } else if (token.type != TOK_UNKNOWN) { | 197 | } else if (token.kind != TOK_UNKNOWN) { |
97 | eprintln(" -> at %s", token.val); | 198 | eprintln(" -> at %s", token.val); |
98 | } | 199 | } |
99 | } | 200 | } |
@@ -106,8 +207,8 @@ parse_advance(Parser *parser) { | |||
106 | } | 207 | } |
107 | 208 | ||
108 | static void | 209 | static void |
109 | parse_consume(Parser *parser, TokenType type, Str msg) { | 210 | parse_consume(Parser *parser, TokenKind kind, Str msg) { |
110 | if (parser->current.type == type) { | 211 | if (parser->current.kind == kind) { |
111 | parse_advance(parser); | 212 | parse_advance(parser); |
112 | return; | 213 | return; |
113 | } | 214 | } |
@@ -117,16 +218,16 @@ parse_consume(Parser *parser, TokenType type, Str msg) { | |||
117 | static void | 218 | static void |
118 | parse_prec(Parser *parser, ParsePrecedence precedence) { | 219 | parse_prec(Parser *parser, ParsePrecedence precedence) { |
119 | parse_advance(parser); | 220 | parse_advance(parser); |
120 | ParseFn prefix = parse_rules[parser->previous.type].prefix; | 221 | ParseFn prefix = parse_rules[parser->previous.kind].prefix; |
121 | if (prefix == NULL) { | 222 | if (prefix == NULL) { |
122 | parse_emit_err(parser, parser->previous, cstr("expected expression")); | 223 | parse_emit_err(parser, parser->previous, cstr("expected expression")); |
123 | return; | 224 | return; |
124 | } | 225 | } |
125 | prefix(parser); | 226 | prefix(parser); |
126 | 227 | ||
127 | while (precedence <= parse_rules[parser->current.type].precedence) { | 228 | while (precedence <= parse_rules[parser->current.kind].precedence) { |
128 | parse_advance(parser); | 229 | parse_advance(parser); |
129 | ParseFn infix = parse_rules[parser->previous.type].infix; | 230 | ParseFn infix = parse_rules[parser->previous.kind].infix; |
130 | if (infix == NULL) { | 231 | if (infix == NULL) { |
131 | parse_emit_err(parser, parser->previous, | 232 | parse_emit_err(parser, parser->previous, |
132 | cstr("expected expression")); | 233 | cstr("expected expression")); |
@@ -138,13 +239,15 @@ parse_prec(Parser *parser, ParsePrecedence precedence) { | |||
138 | 239 | ||
139 | void | 240 | void |
140 | parse_unary(Parser *parser) { | 241 | parse_unary(Parser *parser) { |
242 | #if DEBUG == 1 | ||
141 | print("parsing unary "); | 243 | print("parsing unary "); |
142 | print_token(parser->previous); | 244 | print_token(parser->previous); |
245 | #endif | ||
143 | parse_prec(parser, PREC_ASSIGNMENT); | 246 | parse_prec(parser, PREC_ASSIGNMENT); |
144 | TokenType type = parser->previous.type; | 247 | TokenKind kind = parser->previous.kind; |
145 | parse_expr(parser); | 248 | parse_expr(parser); |
146 | // TODO: ... | 249 | // TODO: ... |
147 | switch (type) { | 250 | switch (kind) { |
148 | // case TOKEN_MINUS: emitByte(OP_NEGATE); break; | 251 | // case TOKEN_MINUS: emitByte(OP_NEGATE); break; |
149 | default: | 252 | default: |
150 | return; // Unreachable. | 253 | return; // Unreachable. |
@@ -153,47 +256,113 @@ parse_unary(Parser *parser) { | |||
153 | 256 | ||
154 | void | 257 | void |
155 | parse_binary(Parser *parser) { | 258 | parse_binary(Parser *parser) { |
259 | #if DEBUG == 1 | ||
156 | print("parsing binary "); | 260 | print("parsing binary "); |
157 | print_token(parser->previous); | 261 | print_token(parser->previous); |
158 | TokenType operatorType = parser->previous.type; | 262 | #endif |
159 | ParseRule rule = parse_rules[operatorType]; | 263 | TokenKind kind = parser->previous.kind; |
264 | ParseRule rule = parse_rules[kind]; | ||
160 | parse_prec(parser, rule.precedence + 1); | 265 | parse_prec(parser, rule.precedence + 1); |
161 | 266 | ||
162 | // TODO: ... | 267 | Node *node; |
163 | // switch (operatorType) { | 268 | switch (kind) { |
164 | // case TOKEN_PLUS: emitByte(OP_ADD); break; | 269 | case TOK_ADD: { |
165 | // case TOKEN_MINUS: emitByte(OP_SUBTRACT); break; | 270 | node = node_alloc(NODE_ADD, parser->previous, parser->storage); |
166 | // case TOKEN_STAR: emitByte(OP_MULTIPLY); break; | 271 | } break; |
167 | // case TOKEN_SLASH: emitByte(OP_DIVIDE); break; | 272 | case TOK_SUB: { |
168 | // default: return; // Unreachable. | 273 | node = node_alloc(NODE_SUB, parser->previous, parser->storage); |
169 | // } | 274 | } break; |
275 | case TOK_MUL: { | ||
276 | node = node_alloc(NODE_MUL, parser->previous, parser->storage); | ||
277 | } break; | ||
278 | case TOK_DIV: { | ||
279 | node = node_alloc(NODE_DIV, parser->previous, parser->storage); | ||
280 | } break; | ||
281 | default: { | ||
282 | parse_emit_err(parser, parser->previous, cstr("unreachable")); | ||
283 | return; | ||
284 | } | ||
285 | } | ||
286 | node->right = array_pop(parser->nodes); | ||
287 | node->left = array_pop(parser->nodes); | ||
288 | array_push(parser->nodes, node, parser->storage); | ||
170 | } | 289 | } |
171 | 290 | ||
172 | void | 291 | void |
173 | parse_number(Parser *parser) { | 292 | parse_number(Parser *parser) { |
293 | #if DEBUG == 1 | ||
174 | print("parsing number "); | 294 | print("parsing number "); |
175 | print_token(parser->previous); | 295 | print_token(parser->previous); |
176 | // TODO: ... | 296 | #endif |
177 | // double value = strtod(parser.previous.start, NULL); | 297 | Node *node = node_alloc(NODE_NUMBER, parser->previous, parser->storage); |
178 | // emitConstant(value); | 298 | node->value.i = str_to_int(parser->previous.val); |
299 | // TODO: handle sign and/or floating point values. | ||
300 | array_push(parser->nodes, node, parser->storage); | ||
179 | } | 301 | } |
180 | 302 | ||
181 | void | 303 | void |
182 | parse_grouping(Parser *parser) { | 304 | parse_grouping(Parser *parser) { |
305 | #if DEBUG == 1 | ||
183 | print("parsing group "); | 306 | print("parsing group "); |
184 | print_token(parser->previous); | 307 | print_token(parser->previous); |
308 | #endif | ||
185 | parse_expr(parser); | 309 | parse_expr(parser); |
186 | parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression")); | 310 | parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression")); |
187 | } | 311 | } |
188 | 312 | ||
189 | void | 313 | void |
190 | parse_expr(Parser *parser) { | 314 | parse_expr(Parser *parser) { |
191 | // TODO: ... | ||
192 | // Can this be prec_none instead? | 315 | // Can this be prec_none instead? |
193 | parse_prec(parser, PREC_ASSIGNMENT); | 316 | parse_prec(parser, PREC_ASSIGNMENT); |
194 | } | 317 | } |
195 | 318 | ||
196 | void | 319 | void |
320 | graph_node(Node *node) { | ||
321 | if (node == NULL) { | ||
322 | return; | ||
323 | } | ||
324 | print("%d [width=2.5,shape=Mrecord,label=\"", node->id); | ||
325 | print("<top> %s ", node_str[node->kind]); | ||
326 | switch (node->kind) { | ||
327 | case NODE_NUMBER: { | ||
328 | print("| Value: %d", node->value.i); | ||
329 | } break; | ||
330 | default: | ||
331 | break; | ||
332 | } | ||
333 | print("| Line: %d | Col: %d ", node->line, node->col); | ||
334 | println("\"];"); | ||
335 | if (node->left) { | ||
336 | println("%d:top:e->%d:top:w;", node->id, node->left->id); | ||
337 | graph_node(node->left); | ||
338 | } | ||
339 | if (node->right) { | ||
340 | println("%d:top:e->%d:top:w;", node->id, node->right->id); | ||
341 | graph_node(node->right); | ||
342 | } | ||
343 | } | ||
344 | |||
345 | void | ||
346 | graph_ast(Node **roots) { | ||
347 | if (roots == NULL) { | ||
348 | return; | ||
349 | } | ||
350 | println("digraph ast {"); | ||
351 | println("rankdir=LR;"); | ||
352 | println("ranksep=\"0.95 equally\";"); | ||
353 | println("nodesep=\"0.5 equally\";"); | ||
354 | println("overlap=scale;"); | ||
355 | println("bgcolor=\"transparent\";"); | ||
356 | for (sz i = 0; i < array_size(roots); ++i) { | ||
357 | println("subgraph %d {", i); | ||
358 | Node *root = roots[array_size(roots) - 1 - i]; | ||
359 | graph_node(root); | ||
360 | println("}"); | ||
361 | } | ||
362 | println("}"); | ||
363 | } | ||
364 | |||
365 | void | ||
197 | process_file(Str path) { | 366 | process_file(Str path) { |
198 | Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); | 367 | Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); |
199 | 368 | ||
@@ -208,11 +377,11 @@ process_file(Str path) { | |||
208 | Scanner scanner = {.str = file.data}; | 377 | Scanner scanner = {.str = file.data}; |
209 | Token *tokens = NULL; | 378 | Token *tokens = NULL; |
210 | Token tok = {0}; | 379 | Token tok = {0}; |
211 | while (tok.type != TOK_EOF) { | 380 | while (tok.kind != TOK_EOF) { |
212 | tok = scan_token(&scanner); | 381 | tok = scan_token(&scanner); |
213 | if (tok.type == TOK_UNKNOWN) { | 382 | if (tok.kind == TOK_UNKNOWN) { |
214 | eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col, | 383 | eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col, |
215 | token_str[tok.type], tok.val); | 384 | token_str[tok.kind], tok.val); |
216 | errors++; | 385 | errors++; |
217 | continue; | 386 | continue; |
218 | } | 387 | } |
@@ -225,11 +394,31 @@ process_file(Str path) { | |||
225 | } | 394 | } |
226 | 395 | ||
227 | // Parser. | 396 | // Parser. |
228 | Parser parser = {.tokens = tokens}; | 397 | Parser parser = { |
398 | .tokens = tokens, | ||
399 | .storage = &lexer_arena, | ||
400 | }; | ||
401 | array_init(parser.nodes, 256, parser.storage); | ||
229 | parse_advance(&parser); | 402 | parse_advance(&parser); |
230 | parse_expr(&parser); | 403 | while (parser.current.kind != TOK_EOF) { |
231 | parse_consume(&parser, TOK_EOF, cstr("expected end of file")); | 404 | parse_expr(&parser); |
405 | } | ||
406 | if (parser.err) { | ||
407 | goto stop; | ||
408 | } | ||
409 | if (mode == PRINT_PARSE) { | ||
410 | graph_ast(parser.nodes); | ||
411 | goto stop; | ||
412 | } | ||
232 | 413 | ||
414 | sz n_roots = array_size(parser.nodes); | ||
415 | for (sz i = 0; i < n_roots; i++) { | ||
416 | // The parser stores the root nodes as a stack. | ||
417 | Node *root = parser.nodes[i]; | ||
418 | print_node(root); println(""); | ||
419 | (void)root; | ||
420 | } | ||
421 | parse_consume(&parser, TOK_EOF, cstr("expected end of file")); | ||
233 | // print_tokens(path, tokens); // DEBUG | 422 | // print_tokens(path, tokens); // DEBUG |
234 | 423 | ||
235 | stop: | 424 | stop: |