aboutsummaryrefslogtreecommitdiffstats
path: root/src/main.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/main.c')
-rw-r--r--src/main.c631
1 files changed, 12 insertions, 619 deletions
diff --git a/src/main.c b/src/main.c
index edd70aa..9848b8b 100644
--- a/src/main.c
+++ b/src/main.c
@@ -3,6 +3,7 @@
3#include <stdlib.h> 3#include <stdlib.h>
4 4
5#include "badlib.h" 5#include "badlib.h"
6#include "lexer.c"
6 7
7typedef enum ExecMode { 8typedef enum ExecMode {
8 RUN_NORMAL, 9 RUN_NORMAL,
@@ -14,607 +15,11 @@ typedef enum ExecMode {
14 15
15static ExecMode mode = RUN_NORMAL; 16static ExecMode mode = RUN_NORMAL;
16 17
17#define LEXER_MEM GB(2)
18
19void 18void
20init(void) { 19init(void) {
21 log_init_default(); 20 log_init_default();
22} 21}
23 22
24typedef enum TokenType {
25 TOK_UNKNOWN = 0,
26
27 // Parentheses.
28 TOK_LPAREN, // (
29 TOK_RPAREN, // )
30 TOK_LSQUARE, // [
31 TOK_RSQUARE, // ]
32 TOK_LCURLY, // {
33 TOK_RCURLY, // }
34
35 // Basic literals.
36 TOK_NUMBER,
37 TOK_SYMBOL,
38 TOK_STRING,
39
40 // Keywords.
41 TOK_BREAK, // break
42 TOK_CASE, // case
43 TOK_CONTINUE, // continue
44 TOK_FALSE, // false
45 TOK_FUN, // fun
46 TOK_IF, // if
47 TOK_LET, // let
48 TOK_MATCH, // match
49 TOK_NIL, // nil
50 TOK_RETURN, // return
51 TOK_SET, // set
52 TOK_STRUCT, // struct
53 TOK_TRUE, // true
54 TOK_WHILE, // while
55
56 // Arithmetic ops.
57 TOK_ADD, // +
58 TOK_SUB, // -
59 TOK_MUL, // *
60 TOK_DIV, // /
61 TOK_MOD, // %
62
63 // Logical ops.
64 TOK_NOT, // !
65 TOK_AND, // &&
66 TOK_OR, // ||
67 TOK_EQ, // ==
68 TOK_NOTEQ, // !=
69 TOK_LT, // <
70 TOK_GT, // >
71 TOK_LE, // <=
72 TOK_GE, // >=
73
74 // Bitwise ops.
75 TOK_BITNOT, // ~
76 TOK_BITAND, // &
77 TOK_BITOR, // |
78 TOK_BITLSHIFT, // <<
79 TOK_BITRSHIFT, // >>
80
81 // Special ops.
82 TOK_COLON, // :
83 TOK_DOT, // .
84 TOK_AT, // @
85 TOK_ASSIGN, // =
86
87 // End of file.
88 TOK_EOF,
89} TokenType;
90
91Str token_str[] = {
92 [TOK_UNKNOWN] = cstr("UNKNOWN"),
93
94 // Parentheses.
95 [TOK_LPAREN] = cstr("LPAREN"),
96 [TOK_RPAREN] = cstr("RPAREN"),
97 [TOK_LSQUARE] = cstr("LSQUARE"),
98 [TOK_RSQUARE] = cstr("RSQUARE"),
99 [TOK_LCURLY] = cstr("LCURLY"),
100 [TOK_RCURLY] = cstr("RCURLY"),
101
102 // Basic literals.
103 [TOK_NUMBER] = cstr("NUMBER"),
104 [TOK_SYMBOL] = cstr("SYMBOL"),
105 [TOK_STRING] = cstr("STRING"),
106
107 // Keywords.
108 [TOK_BREAK] = cstr("BREAK"),
109 [TOK_CASE] = cstr("CASE"),
110 [TOK_CONTINUE] = cstr("CONTINUE"),
111 [TOK_FALSE] = cstr("FALSE"),
112 [TOK_FUN] = cstr("FUN"),
113 [TOK_IF] = cstr("IF"),
114 [TOK_LET] = cstr("LET"),
115 [TOK_MATCH] = cstr("MATCH"),
116 [TOK_NIL] = cstr("NIL"),
117 [TOK_RETURN] = cstr("RETURN"),
118 [TOK_SET] = cstr("SET"),
119 [TOK_STRUCT] = cstr("STRUCT"),
120 [TOK_TRUE] = cstr("TRUE"),
121 [TOK_WHILE] = cstr("WHILE"),
122
123 // Arithmetic ops.
124 [TOK_ADD] = cstr("ADD"),
125 [TOK_SUB] = cstr("SUB"),
126 [TOK_MUL] = cstr("MUL"),
127 [TOK_DIV] = cstr("DIV"),
128 [TOK_MOD] = cstr("MOD"),
129
130 // Logical ops.
131 [TOK_NOT] = cstr("NOT"),
132 [TOK_AND] = cstr("AND"),
133 [TOK_OR] = cstr("OR"),
134 [TOK_EQ] = cstr("EQ"),
135 [TOK_NOTEQ] = cstr("NOTEQ"),
136 [TOK_LT] = cstr("LT"),
137 [TOK_GT] = cstr("GT"),
138 [TOK_LE] = cstr("LE"),
139 [TOK_GE] = cstr("GE"),
140
141 // Bitwise ops.
142 [TOK_BITNOT] = cstr("BITNOT"),
143 [TOK_BITAND] = cstr("BITAND"),
144 [TOK_BITOR] = cstr("BITOR"),
145 [TOK_BITLSHIFT] = cstr("BITLSHIFT"),
146 [TOK_BITRSHIFT] = cstr("BITRSHIFT"),
147
148 // Special ops.
149 [TOK_COLON] = cstr("COLON"),
150 [TOK_DOT] = cstr("DOT"),
151 [TOK_AT] = cstr("AT"),
152 [TOK_ASSIGN] = cstr("ASSIGN"),
153
154 // End of file.
155 [TOK_EOF] = cstr("EOF"),
156};
157
158typedef struct Token {
159 TokenType type;
160 Str val;
161 sz line;
162 sz col;
163} Token;
164
165typedef struct Scanner {
166 Str str;
167 sz line;
168 sz col;
169 Arena *storage;
170} Scanner;
171
172char
173scan_next(Scanner *scanner) {
174 char c = str_next(&scanner->str);
175 if (c == '\n') {
176 scanner->line++;
177 scanner->col = 0;
178 } else {
179 scanner->col++;
180 }
181 return c;
182}
183
184bool
185scan_has_next(Scanner *scanner) {
186 return scanner->str.size;
187}
188
189char
190scan_peek(Scanner *scanner) {
191 return str_peek(scanner->str);
192}
193
194Token
195emit_token(Scanner current, Scanner *scanner, TokenType t) {
196 Str val = current.str;
197 val.size = current.str.size - scanner->str.size;
198 val.size = val.size < 0 ? 0 : val.size;
199 return (Token){
200 .val = val,
201 .line = current.line + 1,
202 .col = current.col + 1,
203 .type = t,
204 };
205}
206
207Token
208emit_token_err(Scanner *scanner, Str err_msg) {
209 return (Token){
210 .line = scanner->line + 1,
211 .col = scanner->col + 1,
212 .val = err_msg,
213 .type = TOK_UNKNOWN,
214 };
215}
216
217void
218scan_skip_line(Scanner *scanner) {
219 SearchResult newline = array_find_next(scanner->str, cstr("\n"));
220 if (newline.found) {
221 scanner->str.mem += newline.pos + 1;
222 scanner->str.size -= newline.pos + 1;
223 scanner->line++;
224 scanner->col = 0;
225 }
226}
227
228void
229scan_skip_whitespace(Scanner *scanner) {
230 while (scan_has_next(scanner)) {
231 char c = scan_peek(scanner);
232 switch (c) {
233 case ' ':
234 case ',': // Commas are just syntactic sugar.
235 case '\f':
236 case '\n':
237 case '\r':
238 case '\t':
239 case '\v': {
240 scan_next(scanner);
241 } break;
242 case ';': {
243 // Found a comment! (skip)
244 scan_skip_line(scanner);
245 } break;
246 default: {
247 return;
248 } break;
249 }
250 }
251}
252
253bool
254is_valid_split(char c) {
255 switch (c) {
256 case ';':
257 case '(':
258 case ')':
259 case '[':
260 case ']':
261 case '{':
262 case '}':
263 case '+':
264 case '-':
265 case '*':
266 case '/':
267 case '%':
268 case '!':
269 case '=':
270 case '<':
271 case '>':
272 case '~':
273 case '&':
274 case '|':
275 case ':':
276 case '.':
277 case '@':
278 case '"':
279 case ' ':
280 case ',':
281 case '\f':
282 case '\n':
283 case '\r':
284 case '\t':
285 case '\v': {
286 return true;
287 } break;
288 }
289 return false;
290}
291
292void
293scan_skip_until_valid(Scanner *scanner) {
294 while (scan_has_next(scanner)) {
295 char c = scan_peek(scanner);
296 if (is_valid_split(c)) {
297 return;
298 }
299 scan_next(scanner);
300 }
301}
302
303Token
304emit_token_number(Scanner *scanner) {
305 Scanner current = *scanner;
306 char c = scan_peek(scanner);
307 if (c == '+' || c == '-') {
308 scan_next(scanner);
309 if (str_has_prefix(scanner->str, cstr("0b")) ||
310 str_has_prefix(scanner->str, cstr("0x"))) {
311 scan_skip_until_valid(scanner);
312 return emit_token_err(
313 &current,
314 cstr("malformed number: binary/hex numbers can't be signed"));
315 }
316 }
317 if (str_has_prefix(scanner->str, cstr("0b"))) {
318 scan_next(scanner);
319 scan_next(scanner);
320 while (scan_has_next(scanner)) {
321 c = scan_peek(scanner);
322 if (c == '0' || c == '1' || c == '_') {
323 scan_next(scanner);
324 continue;
325 }
326 if (is_valid_split(c)) {
327 return emit_token(current, scanner, TOK_NUMBER);
328 }
329 scan_skip_until_valid(scanner);
330 return emit_token_err(
331 &current, cstr("malformed number: invalid binary number"));
332 }
333 } else if (str_has_prefix(scanner->str, cstr("0x"))) {
334 scan_next(scanner);
335 scan_next(scanner);
336 while (scan_has_next(scanner)) {
337 c = scan_peek(scanner);
338 if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
339 (c >= 'A' && c <= 'F') || c == '_') {
340 scan_next(scanner);
341 continue;
342 }
343 if (is_valid_split(c)) {
344 return emit_token(current, scanner, TOK_NUMBER);
345 }
346 scan_skip_until_valid(scanner);
347 return emit_token_err(&current,
348 cstr("malformed number: invalid hex number"));
349 }
350 } else {
351 // Integral.
352 while (scan_has_next(scanner)) {
353 c = scan_peek(scanner);
354 if (c == '.') {
355 scan_next(scanner);
356 break;
357 }
358 if ((c >= '0' && c <= '9') || c == '_') {
359 scan_next(scanner);
360 continue;
361 }
362 if (is_valid_split(c)) {
363 return emit_token(current, scanner, TOK_NUMBER);
364 }
365 scan_skip_until_valid(scanner);
366 return emit_token_err(&current, cstr("malformed number"));
367 }
368 c = scan_peek(scanner);
369 if (!(c >= '0' && c <= '9')) {
370 return emit_token_err(&current,
371 cstr("malformed number: no decimal digits"));
372 }
373 // Decimals.
374 while (scan_has_next(scanner)) {
375 c = scan_peek(scanner);
376 if (c == 'e' || c == 'E') {
377 scan_next(scanner);
378 break;
379 }
380 if ((c >= '0' && c <= '9') || c == '_') {
381 scan_next(scanner);
382 continue;
383 }
384 if (is_valid_split(c)) {
385 return emit_token(current, scanner, TOK_NUMBER);
386 }
387 scan_skip_until_valid(scanner);
388 return emit_token_err(&current, cstr("malformed number"));
389 }
390 // Exponent.
391 c = scan_peek(scanner);
392 if (c == '+' || c == '-') {
393 scan_next(scanner);
394 }
395 while (scan_has_next(scanner)) {
396 c = scan_peek(scanner);
397 if ((c >= '0' && c <= '9') || c == '_') {
398 scan_next(scanner);
399 continue;
400 }
401 if (c == '.') {
402 scan_next(scanner);
403 return emit_token_err(
404 &current,
405 cstr("malformed number: decimals not allowed on exponent"));
406 }
407 if (is_valid_split(c)) {
408 return emit_token(current, scanner, TOK_NUMBER);
409 }
410 scan_skip_until_valid(scanner);
411 return emit_token_err(&current, cstr("malformed number"));
412 }
413 }
414 return emit_token_err(&current, cstr("malformed number"));
415}
416
417Token
418scan_token(Scanner *scanner) {
419 assert(scanner);
420
421 scan_skip_whitespace(scanner);
422 if (!scan_has_next(scanner)) {
423 return emit_token(*scanner, scanner, TOK_EOF);
424 }
425
426 Scanner current = *scanner;
427 char c = scan_next(scanner);
428 switch (c) {
429 case '(':
430 return emit_token(current, scanner, TOK_LPAREN);
431 case ')':
432 return emit_token(current, scanner, TOK_RPAREN);
433 case '[':
434 return emit_token(current, scanner, TOK_LSQUARE);
435 case ']':
436 return emit_token(current, scanner, TOK_RSQUARE);
437 case '{':
438 return emit_token(current, scanner, TOK_LCURLY);
439 case '}':
440 return emit_token(current, scanner, TOK_RCURLY);
441 case '+': {
442 char p = scan_peek(scanner);
443 if (p >= '0' && p <= '9') {
444 *scanner = current;
445 return emit_token_number(scanner);
446 }
447 return emit_token(current, scanner, TOK_ADD);
448 };
449 case '-': {
450 char p = scan_peek(scanner);
451 if (p >= '0' && p <= '9') {
452 *scanner = current;
453 return emit_token_number(scanner);
454 }
455 return emit_token(current, scanner, TOK_ADD);
456 };
457 case '*':
458 return emit_token(current, scanner, TOK_MUL);
459 case '/':
460 return emit_token(current, scanner, TOK_DIV);
461 case '%':
462 return emit_token(current, scanner, TOK_MOD);
463 case '!': {
464 if (scan_peek(scanner) == '=') {
465 scan_next(scanner);
466 return emit_token(current, scanner, TOK_NOTEQ);
467 }
468 return emit_token(current, scanner, TOK_NOT);
469 };
470 case '=': {
471 if (scan_peek(scanner) == '=') {
472 scan_next(scanner);
473 return emit_token(current, scanner, TOK_EQ);
474 }
475 return emit_token(current, scanner, TOK_ASSIGN);
476 };
477 case '<': {
478 char p = scan_peek(scanner);
479 if (p == '=') {
480 scan_next(scanner);
481 return emit_token(current, scanner, TOK_LE);
482 }
483 if (p == '<') {
484 scan_next(scanner);
485 return emit_token(current, scanner, TOK_BITLSHIFT);
486 }
487 return emit_token(current, scanner, TOK_LT);
488 };
489 case '>': {
490 char p = scan_peek(scanner);
491 if (p == '=') {
492 scan_next(scanner);
493 return emit_token(current, scanner, TOK_GE);
494 }
495 if (p == '>') {
496 scan_next(scanner);
497 return emit_token(current, scanner, TOK_BITRSHIFT);
498 }
499 return emit_token(current, scanner, TOK_GT);
500 };
501 case '~':
502 return emit_token(current, scanner, TOK_BITNOT);
503 case '&': {
504 if (scan_peek(scanner) == '&') {
505 scan_next(scanner);
506 return emit_token(current, scanner, TOK_AND);
507 }
508 return emit_token(current, scanner, TOK_BITAND);
509 };
510 case '|': {
511 if (scan_peek(scanner) == '|') {
512 scan_next(scanner);
513 return emit_token(current, scanner, TOK_OR);
514 }
515 return emit_token(current, scanner, TOK_BITOR);
516 };
517 case ':':
518 return emit_token(current, scanner, TOK_COLON);
519 case '.':
520 return emit_token(current, scanner, TOK_DOT);
521 case '@':
522 return emit_token(current, scanner, TOK_AT);
523 case '"': {
524 while (scan_has_next(scanner)) {
525 c = scan_next(scanner);
526 if (c == '\\') {
527 scan_next(scanner);
528 continue;
529 }
530 if (c == '"') {
531 return emit_token(current, scanner, TOK_STRING);
532 }
533 }
534 return emit_token_err(&current, cstr("mismatched string quotes"));
535 };
536 }
537 if (c >= '0' && c <= '9') {
538 *scanner = current;
539 return emit_token_number(scanner);
540 }
541
542 scan_skip_until_valid(scanner);
543 Str val = current.str;
544 val.size = current.str.size - scanner->str.size;
545 val.size = val.size < 0 ? 0 : val.size;
546 if (val.size == 0) {
547 return emit_token_err(&current, cstr("unexpected character"));
548 }
549 switch (val.mem[0]) {
550 case 'b': {
551 if (str_has_prefix(val, cstr("break"))) {
552 return emit_token(current, scanner, TOK_BREAK);
553 }
554 } break;
555 case 'c': {
556 if (str_has_prefix(val, cstr("case"))) {
557 return emit_token(current, scanner, TOK_CASE);
558 }
559 if (str_has_prefix(val, cstr("continue"))) {
560 return emit_token(current, scanner, TOK_CONTINUE);
561 }
562 } break;
563 case 'f': {
564 if (str_has_prefix(val, cstr("false"))) {
565 return emit_token(current, scanner, TOK_FALSE);
566 }
567 if (str_has_prefix(val, cstr("fun"))) {
568 return emit_token(current, scanner, TOK_FUN);
569 }
570 } break;
571 case 'i': {
572 if (str_has_prefix(val, cstr("if"))) {
573 return emit_token(current, scanner, TOK_IF);
574 }
575 } break;
576 case 'l': {
577 if (str_has_prefix(val, cstr("let"))) {
578 return emit_token(current, scanner, TOK_LET);
579 }
580 } break;
581 case 'm': {
582 if (str_has_prefix(val, cstr("match"))) {
583 return emit_token(current, scanner, TOK_MATCH);
584 }
585 } break;
586 case 'n': {
587 if (str_has_prefix(val, cstr("nil"))) {
588 return emit_token(current, scanner, TOK_NIL);
589 }
590 } break;
591 case 'r': {
592 if (str_has_prefix(val, cstr("return"))) {
593 return emit_token(current, scanner, TOK_RETURN);
594 }
595 } break;
596 case 's': {
597 if (str_has_prefix(val, cstr("set"))) {
598 return emit_token(current, scanner, TOK_SET);
599 }
600 if (str_has_prefix(val, cstr("struct"))) {
601 return emit_token(current, scanner, TOK_STRUCT);
602 }
603 } break;
604 case 't': {
605 if (str_has_prefix(val, cstr("true"))) {
606 return emit_token(current, scanner, TOK_TRUE);
607 }
608 } break;
609 case 'w': {
610 if (str_has_prefix(val, cstr("while"))) {
611 return emit_token(current, scanner, TOK_WHILE);
612 }
613 } break;
614 }
615 return emit_token(current, scanner, TOK_SYMBOL);
616}
617
618void 23void
619process_file(Str path) { 24process_file(Str path) {
620 Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); 25 Arena lexer_arena = arena_create(LEXER_MEM, os_allocator);
@@ -628,36 +33,24 @@ process_file(Str path) {
628 33
629 Scanner scanner = { 34 Scanner scanner = {
630 .str = file.data, 35 .str = file.data,
631 .storage = &lexer_arena,
632 }; 36 };
633 Token tok = {0}; 37 Token tok = {0};
38 sz errors = 0;
634 while (tok.type != TOK_EOF) { 39 while (tok.type != TOK_EOF) {
635 tok = scan_token(&scanner); 40 tok = scan_token(&scanner);
636 eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col, token_str[tok.type], 41 if (tok.type == TOK_UNKNOWN) {
637 tok.val); 42 eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col,
43 token_str[tok.type], tok.val);
44 errors++;
45 }
638 } 46 }
639 // while (true) {
640 // Token tok = scan_token(&scanner);
641 // println("%s:%d:%d:%s %s", path, tok.line, tok.col,
642 // token_str[tok.type],
643 // tok.val);
644 // if (tok.type == TOK_EOF) break;
645 // }
646 47
647 // Str scanner = file.data; 48 // Only proceed if there are no errors.
648 // // NOTE: Testing file read line by line. 49 if (errors) {
649 // for (sz i = 0; scanner.size != 0; i++) { 50 goto stop;
650 // Str line = str_split(&scanner, cstr("\n")); 51 }
651 // println("%x{4} %s", i + 1, line);
652 // }
653
654 // println("<<< %x{4} %b{4} %f{2} %s %{Arena} >>>", 123, 3, 1.345,
655 // cstr("BOOM!"), &logger_inf.storage);
656 52
657 // println("%{Mem}", &(Array){lexer_arena.beg, lexer_arena.size}); 53stop:
658 // eprintln("%s:%d:%d: %s -> %c", path, 1, 1, cstr("error: testing string
659 // logger"), 'X'); while (true) {}
660 // TODO: run lexer.
661 // Free up resources. 54 // Free up resources.
662 arena_destroy(&lexer_arena, os_allocator); 55 arena_destroy(&lexer_arena, os_allocator);
663} 56}