diff options
author | Bad Diode <bd@badd10de.dev> | 2024-06-15 16:52:36 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2024-06-15 16:52:36 +0200 |
commit | e7cd0d47a603e4199b0ee7daa2434fc0db602bad (patch) | |
tree | 511cfbe2cea66e45b4ca7669ed9a101763ae3537 /src/main.c | |
parent | 893b52223d274c675272cee55768a9d5853420fb (diff) | |
download | bdl-e7cd0d47a603e4199b0ee7daa2434fc0db602bad.tar.gz bdl-e7cd0d47a603e4199b0ee7daa2434fc0db602bad.zip |
Move lexer code to lexer.c file
Diffstat (limited to 'src/main.c')
-rw-r--r-- | src/main.c | 631 |
1 files changed, 12 insertions, 619 deletions
@@ -3,6 +3,7 @@ | |||
3 | #include <stdlib.h> | 3 | #include <stdlib.h> |
4 | 4 | ||
5 | #include "badlib.h" | 5 | #include "badlib.h" |
6 | #include "lexer.c" | ||
6 | 7 | ||
7 | typedef enum ExecMode { | 8 | typedef enum ExecMode { |
8 | RUN_NORMAL, | 9 | RUN_NORMAL, |
@@ -14,607 +15,11 @@ typedef enum ExecMode { | |||
14 | 15 | ||
15 | static ExecMode mode = RUN_NORMAL; | 16 | static ExecMode mode = RUN_NORMAL; |
16 | 17 | ||
17 | #define LEXER_MEM GB(2) | ||
18 | |||
19 | void | 18 | void |
20 | init(void) { | 19 | init(void) { |
21 | log_init_default(); | 20 | log_init_default(); |
22 | } | 21 | } |
23 | 22 | ||
24 | typedef enum TokenType { | ||
25 | TOK_UNKNOWN = 0, | ||
26 | |||
27 | // Parentheses. | ||
28 | TOK_LPAREN, // ( | ||
29 | TOK_RPAREN, // ) | ||
30 | TOK_LSQUARE, // [ | ||
31 | TOK_RSQUARE, // ] | ||
32 | TOK_LCURLY, // { | ||
33 | TOK_RCURLY, // } | ||
34 | |||
35 | // Basic literals. | ||
36 | TOK_NUMBER, | ||
37 | TOK_SYMBOL, | ||
38 | TOK_STRING, | ||
39 | |||
40 | // Keywords. | ||
41 | TOK_BREAK, // break | ||
42 | TOK_CASE, // case | ||
43 | TOK_CONTINUE, // continue | ||
44 | TOK_FALSE, // false | ||
45 | TOK_FUN, // fun | ||
46 | TOK_IF, // if | ||
47 | TOK_LET, // let | ||
48 | TOK_MATCH, // match | ||
49 | TOK_NIL, // nil | ||
50 | TOK_RETURN, // return | ||
51 | TOK_SET, // set | ||
52 | TOK_STRUCT, // struct | ||
53 | TOK_TRUE, // true | ||
54 | TOK_WHILE, // while | ||
55 | |||
56 | // Arithmetic ops. | ||
57 | TOK_ADD, // + | ||
58 | TOK_SUB, // - | ||
59 | TOK_MUL, // * | ||
60 | TOK_DIV, // / | ||
61 | TOK_MOD, // % | ||
62 | |||
63 | // Logical ops. | ||
64 | TOK_NOT, // ! | ||
65 | TOK_AND, // && | ||
66 | TOK_OR, // || | ||
67 | TOK_EQ, // == | ||
68 | TOK_NOTEQ, // != | ||
69 | TOK_LT, // < | ||
70 | TOK_GT, // > | ||
71 | TOK_LE, // <= | ||
72 | TOK_GE, // >= | ||
73 | |||
74 | // Bitwise ops. | ||
75 | TOK_BITNOT, // ~ | ||
76 | TOK_BITAND, // & | ||
77 | TOK_BITOR, // | | ||
78 | TOK_BITLSHIFT, // << | ||
79 | TOK_BITRSHIFT, // >> | ||
80 | |||
81 | // Special ops. | ||
82 | TOK_COLON, // : | ||
83 | TOK_DOT, // . | ||
84 | TOK_AT, // @ | ||
85 | TOK_ASSIGN, // = | ||
86 | |||
87 | // End of file. | ||
88 | TOK_EOF, | ||
89 | } TokenType; | ||
90 | |||
91 | Str token_str[] = { | ||
92 | [TOK_UNKNOWN] = cstr("UNKNOWN"), | ||
93 | |||
94 | // Parentheses. | ||
95 | [TOK_LPAREN] = cstr("LPAREN"), | ||
96 | [TOK_RPAREN] = cstr("RPAREN"), | ||
97 | [TOK_LSQUARE] = cstr("LSQUARE"), | ||
98 | [TOK_RSQUARE] = cstr("RSQUARE"), | ||
99 | [TOK_LCURLY] = cstr("LCURLY"), | ||
100 | [TOK_RCURLY] = cstr("RCURLY"), | ||
101 | |||
102 | // Basic literals. | ||
103 | [TOK_NUMBER] = cstr("NUMBER"), | ||
104 | [TOK_SYMBOL] = cstr("SYMBOL"), | ||
105 | [TOK_STRING] = cstr("STRING"), | ||
106 | |||
107 | // Keywords. | ||
108 | [TOK_BREAK] = cstr("BREAK"), | ||
109 | [TOK_CASE] = cstr("CASE"), | ||
110 | [TOK_CONTINUE] = cstr("CONTINUE"), | ||
111 | [TOK_FALSE] = cstr("FALSE"), | ||
112 | [TOK_FUN] = cstr("FUN"), | ||
113 | [TOK_IF] = cstr("IF"), | ||
114 | [TOK_LET] = cstr("LET"), | ||
115 | [TOK_MATCH] = cstr("MATCH"), | ||
116 | [TOK_NIL] = cstr("NIL"), | ||
117 | [TOK_RETURN] = cstr("RETURN"), | ||
118 | [TOK_SET] = cstr("SET"), | ||
119 | [TOK_STRUCT] = cstr("STRUCT"), | ||
120 | [TOK_TRUE] = cstr("TRUE"), | ||
121 | [TOK_WHILE] = cstr("WHILE"), | ||
122 | |||
123 | // Arithmetic ops. | ||
124 | [TOK_ADD] = cstr("ADD"), | ||
125 | [TOK_SUB] = cstr("SUB"), | ||
126 | [TOK_MUL] = cstr("MUL"), | ||
127 | [TOK_DIV] = cstr("DIV"), | ||
128 | [TOK_MOD] = cstr("MOD"), | ||
129 | |||
130 | // Logical ops. | ||
131 | [TOK_NOT] = cstr("NOT"), | ||
132 | [TOK_AND] = cstr("AND"), | ||
133 | [TOK_OR] = cstr("OR"), | ||
134 | [TOK_EQ] = cstr("EQ"), | ||
135 | [TOK_NOTEQ] = cstr("NOTEQ"), | ||
136 | [TOK_LT] = cstr("LT"), | ||
137 | [TOK_GT] = cstr("GT"), | ||
138 | [TOK_LE] = cstr("LE"), | ||
139 | [TOK_GE] = cstr("GE"), | ||
140 | |||
141 | // Bitwise ops. | ||
142 | [TOK_BITNOT] = cstr("BITNOT"), | ||
143 | [TOK_BITAND] = cstr("BITAND"), | ||
144 | [TOK_BITOR] = cstr("BITOR"), | ||
145 | [TOK_BITLSHIFT] = cstr("BITLSHIFT"), | ||
146 | [TOK_BITRSHIFT] = cstr("BITRSHIFT"), | ||
147 | |||
148 | // Special ops. | ||
149 | [TOK_COLON] = cstr("COLON"), | ||
150 | [TOK_DOT] = cstr("DOT"), | ||
151 | [TOK_AT] = cstr("AT"), | ||
152 | [TOK_ASSIGN] = cstr("ASSIGN"), | ||
153 | |||
154 | // End of file. | ||
155 | [TOK_EOF] = cstr("EOF"), | ||
156 | }; | ||
157 | |||
158 | typedef struct Token { | ||
159 | TokenType type; | ||
160 | Str val; | ||
161 | sz line; | ||
162 | sz col; | ||
163 | } Token; | ||
164 | |||
165 | typedef struct Scanner { | ||
166 | Str str; | ||
167 | sz line; | ||
168 | sz col; | ||
169 | Arena *storage; | ||
170 | } Scanner; | ||
171 | |||
172 | char | ||
173 | scan_next(Scanner *scanner) { | ||
174 | char c = str_next(&scanner->str); | ||
175 | if (c == '\n') { | ||
176 | scanner->line++; | ||
177 | scanner->col = 0; | ||
178 | } else { | ||
179 | scanner->col++; | ||
180 | } | ||
181 | return c; | ||
182 | } | ||
183 | |||
184 | bool | ||
185 | scan_has_next(Scanner *scanner) { | ||
186 | return scanner->str.size; | ||
187 | } | ||
188 | |||
189 | char | ||
190 | scan_peek(Scanner *scanner) { | ||
191 | return str_peek(scanner->str); | ||
192 | } | ||
193 | |||
194 | Token | ||
195 | emit_token(Scanner current, Scanner *scanner, TokenType t) { | ||
196 | Str val = current.str; | ||
197 | val.size = current.str.size - scanner->str.size; | ||
198 | val.size = val.size < 0 ? 0 : val.size; | ||
199 | return (Token){ | ||
200 | .val = val, | ||
201 | .line = current.line + 1, | ||
202 | .col = current.col + 1, | ||
203 | .type = t, | ||
204 | }; | ||
205 | } | ||
206 | |||
207 | Token | ||
208 | emit_token_err(Scanner *scanner, Str err_msg) { | ||
209 | return (Token){ | ||
210 | .line = scanner->line + 1, | ||
211 | .col = scanner->col + 1, | ||
212 | .val = err_msg, | ||
213 | .type = TOK_UNKNOWN, | ||
214 | }; | ||
215 | } | ||
216 | |||
217 | void | ||
218 | scan_skip_line(Scanner *scanner) { | ||
219 | SearchResult newline = array_find_next(scanner->str, cstr("\n")); | ||
220 | if (newline.found) { | ||
221 | scanner->str.mem += newline.pos + 1; | ||
222 | scanner->str.size -= newline.pos + 1; | ||
223 | scanner->line++; | ||
224 | scanner->col = 0; | ||
225 | } | ||
226 | } | ||
227 | |||
228 | void | ||
229 | scan_skip_whitespace(Scanner *scanner) { | ||
230 | while (scan_has_next(scanner)) { | ||
231 | char c = scan_peek(scanner); | ||
232 | switch (c) { | ||
233 | case ' ': | ||
234 | case ',': // Commas are just syntactic sugar. | ||
235 | case '\f': | ||
236 | case '\n': | ||
237 | case '\r': | ||
238 | case '\t': | ||
239 | case '\v': { | ||
240 | scan_next(scanner); | ||
241 | } break; | ||
242 | case ';': { | ||
243 | // Found a comment! (skip) | ||
244 | scan_skip_line(scanner); | ||
245 | } break; | ||
246 | default: { | ||
247 | return; | ||
248 | } break; | ||
249 | } | ||
250 | } | ||
251 | } | ||
252 | |||
253 | bool | ||
254 | is_valid_split(char c) { | ||
255 | switch (c) { | ||
256 | case ';': | ||
257 | case '(': | ||
258 | case ')': | ||
259 | case '[': | ||
260 | case ']': | ||
261 | case '{': | ||
262 | case '}': | ||
263 | case '+': | ||
264 | case '-': | ||
265 | case '*': | ||
266 | case '/': | ||
267 | case '%': | ||
268 | case '!': | ||
269 | case '=': | ||
270 | case '<': | ||
271 | case '>': | ||
272 | case '~': | ||
273 | case '&': | ||
274 | case '|': | ||
275 | case ':': | ||
276 | case '.': | ||
277 | case '@': | ||
278 | case '"': | ||
279 | case ' ': | ||
280 | case ',': | ||
281 | case '\f': | ||
282 | case '\n': | ||
283 | case '\r': | ||
284 | case '\t': | ||
285 | case '\v': { | ||
286 | return true; | ||
287 | } break; | ||
288 | } | ||
289 | return false; | ||
290 | } | ||
291 | |||
292 | void | ||
293 | scan_skip_until_valid(Scanner *scanner) { | ||
294 | while (scan_has_next(scanner)) { | ||
295 | char c = scan_peek(scanner); | ||
296 | if (is_valid_split(c)) { | ||
297 | return; | ||
298 | } | ||
299 | scan_next(scanner); | ||
300 | } | ||
301 | } | ||
302 | |||
303 | Token | ||
304 | emit_token_number(Scanner *scanner) { | ||
305 | Scanner current = *scanner; | ||
306 | char c = scan_peek(scanner); | ||
307 | if (c == '+' || c == '-') { | ||
308 | scan_next(scanner); | ||
309 | if (str_has_prefix(scanner->str, cstr("0b")) || | ||
310 | str_has_prefix(scanner->str, cstr("0x"))) { | ||
311 | scan_skip_until_valid(scanner); | ||
312 | return emit_token_err( | ||
313 | ¤t, | ||
314 | cstr("malformed number: binary/hex numbers can't be signed")); | ||
315 | } | ||
316 | } | ||
317 | if (str_has_prefix(scanner->str, cstr("0b"))) { | ||
318 | scan_next(scanner); | ||
319 | scan_next(scanner); | ||
320 | while (scan_has_next(scanner)) { | ||
321 | c = scan_peek(scanner); | ||
322 | if (c == '0' || c == '1' || c == '_') { | ||
323 | scan_next(scanner); | ||
324 | continue; | ||
325 | } | ||
326 | if (is_valid_split(c)) { | ||
327 | return emit_token(current, scanner, TOK_NUMBER); | ||
328 | } | ||
329 | scan_skip_until_valid(scanner); | ||
330 | return emit_token_err( | ||
331 | ¤t, cstr("malformed number: invalid binary number")); | ||
332 | } | ||
333 | } else if (str_has_prefix(scanner->str, cstr("0x"))) { | ||
334 | scan_next(scanner); | ||
335 | scan_next(scanner); | ||
336 | while (scan_has_next(scanner)) { | ||
337 | c = scan_peek(scanner); | ||
338 | if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || | ||
339 | (c >= 'A' && c <= 'F') || c == '_') { | ||
340 | scan_next(scanner); | ||
341 | continue; | ||
342 | } | ||
343 | if (is_valid_split(c)) { | ||
344 | return emit_token(current, scanner, TOK_NUMBER); | ||
345 | } | ||
346 | scan_skip_until_valid(scanner); | ||
347 | return emit_token_err(¤t, | ||
348 | cstr("malformed number: invalid hex number")); | ||
349 | } | ||
350 | } else { | ||
351 | // Integral. | ||
352 | while (scan_has_next(scanner)) { | ||
353 | c = scan_peek(scanner); | ||
354 | if (c == '.') { | ||
355 | scan_next(scanner); | ||
356 | break; | ||
357 | } | ||
358 | if ((c >= '0' && c <= '9') || c == '_') { | ||
359 | scan_next(scanner); | ||
360 | continue; | ||
361 | } | ||
362 | if (is_valid_split(c)) { | ||
363 | return emit_token(current, scanner, TOK_NUMBER); | ||
364 | } | ||
365 | scan_skip_until_valid(scanner); | ||
366 | return emit_token_err(¤t, cstr("malformed number")); | ||
367 | } | ||
368 | c = scan_peek(scanner); | ||
369 | if (!(c >= '0' && c <= '9')) { | ||
370 | return emit_token_err(¤t, | ||
371 | cstr("malformed number: no decimal digits")); | ||
372 | } | ||
373 | // Decimals. | ||
374 | while (scan_has_next(scanner)) { | ||
375 | c = scan_peek(scanner); | ||
376 | if (c == 'e' || c == 'E') { | ||
377 | scan_next(scanner); | ||
378 | break; | ||
379 | } | ||
380 | if ((c >= '0' && c <= '9') || c == '_') { | ||
381 | scan_next(scanner); | ||
382 | continue; | ||
383 | } | ||
384 | if (is_valid_split(c)) { | ||
385 | return emit_token(current, scanner, TOK_NUMBER); | ||
386 | } | ||
387 | scan_skip_until_valid(scanner); | ||
388 | return emit_token_err(¤t, cstr("malformed number")); | ||
389 | } | ||
390 | // Exponent. | ||
391 | c = scan_peek(scanner); | ||
392 | if (c == '+' || c == '-') { | ||
393 | scan_next(scanner); | ||
394 | } | ||
395 | while (scan_has_next(scanner)) { | ||
396 | c = scan_peek(scanner); | ||
397 | if ((c >= '0' && c <= '9') || c == '_') { | ||
398 | scan_next(scanner); | ||
399 | continue; | ||
400 | } | ||
401 | if (c == '.') { | ||
402 | scan_next(scanner); | ||
403 | return emit_token_err( | ||
404 | ¤t, | ||
405 | cstr("malformed number: decimals not allowed on exponent")); | ||
406 | } | ||
407 | if (is_valid_split(c)) { | ||
408 | return emit_token(current, scanner, TOK_NUMBER); | ||
409 | } | ||
410 | scan_skip_until_valid(scanner); | ||
411 | return emit_token_err(¤t, cstr("malformed number")); | ||
412 | } | ||
413 | } | ||
414 | return emit_token_err(¤t, cstr("malformed number")); | ||
415 | } | ||
416 | |||
417 | Token | ||
418 | scan_token(Scanner *scanner) { | ||
419 | assert(scanner); | ||
420 | |||
421 | scan_skip_whitespace(scanner); | ||
422 | if (!scan_has_next(scanner)) { | ||
423 | return emit_token(*scanner, scanner, TOK_EOF); | ||
424 | } | ||
425 | |||
426 | Scanner current = *scanner; | ||
427 | char c = scan_next(scanner); | ||
428 | switch (c) { | ||
429 | case '(': | ||
430 | return emit_token(current, scanner, TOK_LPAREN); | ||
431 | case ')': | ||
432 | return emit_token(current, scanner, TOK_RPAREN); | ||
433 | case '[': | ||
434 | return emit_token(current, scanner, TOK_LSQUARE); | ||
435 | case ']': | ||
436 | return emit_token(current, scanner, TOK_RSQUARE); | ||
437 | case '{': | ||
438 | return emit_token(current, scanner, TOK_LCURLY); | ||
439 | case '}': | ||
440 | return emit_token(current, scanner, TOK_RCURLY); | ||
441 | case '+': { | ||
442 | char p = scan_peek(scanner); | ||
443 | if (p >= '0' && p <= '9') { | ||
444 | *scanner = current; | ||
445 | return emit_token_number(scanner); | ||
446 | } | ||
447 | return emit_token(current, scanner, TOK_ADD); | ||
448 | }; | ||
449 | case '-': { | ||
450 | char p = scan_peek(scanner); | ||
451 | if (p >= '0' && p <= '9') { | ||
452 | *scanner = current; | ||
453 | return emit_token_number(scanner); | ||
454 | } | ||
455 | return emit_token(current, scanner, TOK_ADD); | ||
456 | }; | ||
457 | case '*': | ||
458 | return emit_token(current, scanner, TOK_MUL); | ||
459 | case '/': | ||
460 | return emit_token(current, scanner, TOK_DIV); | ||
461 | case '%': | ||
462 | return emit_token(current, scanner, TOK_MOD); | ||
463 | case '!': { | ||
464 | if (scan_peek(scanner) == '=') { | ||
465 | scan_next(scanner); | ||
466 | return emit_token(current, scanner, TOK_NOTEQ); | ||
467 | } | ||
468 | return emit_token(current, scanner, TOK_NOT); | ||
469 | }; | ||
470 | case '=': { | ||
471 | if (scan_peek(scanner) == '=') { | ||
472 | scan_next(scanner); | ||
473 | return emit_token(current, scanner, TOK_EQ); | ||
474 | } | ||
475 | return emit_token(current, scanner, TOK_ASSIGN); | ||
476 | }; | ||
477 | case '<': { | ||
478 | char p = scan_peek(scanner); | ||
479 | if (p == '=') { | ||
480 | scan_next(scanner); | ||
481 | return emit_token(current, scanner, TOK_LE); | ||
482 | } | ||
483 | if (p == '<') { | ||
484 | scan_next(scanner); | ||
485 | return emit_token(current, scanner, TOK_BITLSHIFT); | ||
486 | } | ||
487 | return emit_token(current, scanner, TOK_LT); | ||
488 | }; | ||
489 | case '>': { | ||
490 | char p = scan_peek(scanner); | ||
491 | if (p == '=') { | ||
492 | scan_next(scanner); | ||
493 | return emit_token(current, scanner, TOK_GE); | ||
494 | } | ||
495 | if (p == '>') { | ||
496 | scan_next(scanner); | ||
497 | return emit_token(current, scanner, TOK_BITRSHIFT); | ||
498 | } | ||
499 | return emit_token(current, scanner, TOK_GT); | ||
500 | }; | ||
501 | case '~': | ||
502 | return emit_token(current, scanner, TOK_BITNOT); | ||
503 | case '&': { | ||
504 | if (scan_peek(scanner) == '&') { | ||
505 | scan_next(scanner); | ||
506 | return emit_token(current, scanner, TOK_AND); | ||
507 | } | ||
508 | return emit_token(current, scanner, TOK_BITAND); | ||
509 | }; | ||
510 | case '|': { | ||
511 | if (scan_peek(scanner) == '|') { | ||
512 | scan_next(scanner); | ||
513 | return emit_token(current, scanner, TOK_OR); | ||
514 | } | ||
515 | return emit_token(current, scanner, TOK_BITOR); | ||
516 | }; | ||
517 | case ':': | ||
518 | return emit_token(current, scanner, TOK_COLON); | ||
519 | case '.': | ||
520 | return emit_token(current, scanner, TOK_DOT); | ||
521 | case '@': | ||
522 | return emit_token(current, scanner, TOK_AT); | ||
523 | case '"': { | ||
524 | while (scan_has_next(scanner)) { | ||
525 | c = scan_next(scanner); | ||
526 | if (c == '\\') { | ||
527 | scan_next(scanner); | ||
528 | continue; | ||
529 | } | ||
530 | if (c == '"') { | ||
531 | return emit_token(current, scanner, TOK_STRING); | ||
532 | } | ||
533 | } | ||
534 | return emit_token_err(¤t, cstr("mismatched string quotes")); | ||
535 | }; | ||
536 | } | ||
537 | if (c >= '0' && c <= '9') { | ||
538 | *scanner = current; | ||
539 | return emit_token_number(scanner); | ||
540 | } | ||
541 | |||
542 | scan_skip_until_valid(scanner); | ||
543 | Str val = current.str; | ||
544 | val.size = current.str.size - scanner->str.size; | ||
545 | val.size = val.size < 0 ? 0 : val.size; | ||
546 | if (val.size == 0) { | ||
547 | return emit_token_err(¤t, cstr("unexpected character")); | ||
548 | } | ||
549 | switch (val.mem[0]) { | ||
550 | case 'b': { | ||
551 | if (str_has_prefix(val, cstr("break"))) { | ||
552 | return emit_token(current, scanner, TOK_BREAK); | ||
553 | } | ||
554 | } break; | ||
555 | case 'c': { | ||
556 | if (str_has_prefix(val, cstr("case"))) { | ||
557 | return emit_token(current, scanner, TOK_CASE); | ||
558 | } | ||
559 | if (str_has_prefix(val, cstr("continue"))) { | ||
560 | return emit_token(current, scanner, TOK_CONTINUE); | ||
561 | } | ||
562 | } break; | ||
563 | case 'f': { | ||
564 | if (str_has_prefix(val, cstr("false"))) { | ||
565 | return emit_token(current, scanner, TOK_FALSE); | ||
566 | } | ||
567 | if (str_has_prefix(val, cstr("fun"))) { | ||
568 | return emit_token(current, scanner, TOK_FUN); | ||
569 | } | ||
570 | } break; | ||
571 | case 'i': { | ||
572 | if (str_has_prefix(val, cstr("if"))) { | ||
573 | return emit_token(current, scanner, TOK_IF); | ||
574 | } | ||
575 | } break; | ||
576 | case 'l': { | ||
577 | if (str_has_prefix(val, cstr("let"))) { | ||
578 | return emit_token(current, scanner, TOK_LET); | ||
579 | } | ||
580 | } break; | ||
581 | case 'm': { | ||
582 | if (str_has_prefix(val, cstr("match"))) { | ||
583 | return emit_token(current, scanner, TOK_MATCH); | ||
584 | } | ||
585 | } break; | ||
586 | case 'n': { | ||
587 | if (str_has_prefix(val, cstr("nil"))) { | ||
588 | return emit_token(current, scanner, TOK_NIL); | ||
589 | } | ||
590 | } break; | ||
591 | case 'r': { | ||
592 | if (str_has_prefix(val, cstr("return"))) { | ||
593 | return emit_token(current, scanner, TOK_RETURN); | ||
594 | } | ||
595 | } break; | ||
596 | case 's': { | ||
597 | if (str_has_prefix(val, cstr("set"))) { | ||
598 | return emit_token(current, scanner, TOK_SET); | ||
599 | } | ||
600 | if (str_has_prefix(val, cstr("struct"))) { | ||
601 | return emit_token(current, scanner, TOK_STRUCT); | ||
602 | } | ||
603 | } break; | ||
604 | case 't': { | ||
605 | if (str_has_prefix(val, cstr("true"))) { | ||
606 | return emit_token(current, scanner, TOK_TRUE); | ||
607 | } | ||
608 | } break; | ||
609 | case 'w': { | ||
610 | if (str_has_prefix(val, cstr("while"))) { | ||
611 | return emit_token(current, scanner, TOK_WHILE); | ||
612 | } | ||
613 | } break; | ||
614 | } | ||
615 | return emit_token(current, scanner, TOK_SYMBOL); | ||
616 | } | ||
617 | |||
618 | void | 23 | void |
619 | process_file(Str path) { | 24 | process_file(Str path) { |
620 | Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); | 25 | Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); |
@@ -628,36 +33,24 @@ process_file(Str path) { | |||
628 | 33 | ||
629 | Scanner scanner = { | 34 | Scanner scanner = { |
630 | .str = file.data, | 35 | .str = file.data, |
631 | .storage = &lexer_arena, | ||
632 | }; | 36 | }; |
633 | Token tok = {0}; | 37 | Token tok = {0}; |
38 | sz errors = 0; | ||
634 | while (tok.type != TOK_EOF) { | 39 | while (tok.type != TOK_EOF) { |
635 | tok = scan_token(&scanner); | 40 | tok = scan_token(&scanner); |
636 | eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col, token_str[tok.type], | 41 | if (tok.type == TOK_UNKNOWN) { |
637 | tok.val); | 42 | eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col, |
43 | token_str[tok.type], tok.val); | ||
44 | errors++; | ||
45 | } | ||
638 | } | 46 | } |
639 | // while (true) { | ||
640 | // Token tok = scan_token(&scanner); | ||
641 | // println("%s:%d:%d:%s %s", path, tok.line, tok.col, | ||
642 | // token_str[tok.type], | ||
643 | // tok.val); | ||
644 | // if (tok.type == TOK_EOF) break; | ||
645 | // } | ||
646 | 47 | ||
647 | // Str scanner = file.data; | 48 | // Only proceed if there are no errors. |
648 | // // NOTE: Testing file read line by line. | 49 | if (errors) { |
649 | // for (sz i = 0; scanner.size != 0; i++) { | 50 | goto stop; |
650 | // Str line = str_split(&scanner, cstr("\n")); | 51 | } |
651 | // println("%x{4} %s", i + 1, line); | ||
652 | // } | ||
653 | |||
654 | // println("<<< %x{4} %b{4} %f{2} %s %{Arena} >>>", 123, 3, 1.345, | ||
655 | // cstr("BOOM!"), &logger_inf.storage); | ||
656 | 52 | ||
657 | // println("%{Mem}", &(Array){lexer_arena.beg, lexer_arena.size}); | 53 | stop: |
658 | // eprintln("%s:%d:%d: %s -> %c", path, 1, 1, cstr("error: testing string | ||
659 | // logger"), 'X'); while (true) {} | ||
660 | // TODO: run lexer. | ||
661 | // Free up resources. | 54 | // Free up resources. |
662 | arena_destroy(&lexer_arena, os_allocator); | 55 | arena_destroy(&lexer_arena, os_allocator); |
663 | } | 56 | } |