diff options
author | Bad Diode <bd@badd10de.dev> | 2024-06-15 16:10:16 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2024-06-15 16:10:16 +0200 |
commit | 893b52223d274c675272cee55768a9d5853420fb (patch) | |
tree | e9b57ec842cf622c7b50ec7f1cfb29914e7a7251 | |
parent | 99b92b160af5b9475262676aef7dd7f209429298 (diff) | |
download | bdl-893b52223d274c675272cee55768a9d5853420fb.tar.gz bdl-893b52223d274c675272cee55768a9d5853420fb.zip |
Finish basic lexing
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | src/main.c | 163 | ||||
-rw-r--r-- | tests/literals.bad | 1 |
3 files changed, 108 insertions, 58 deletions
@@ -47,7 +47,7 @@ tests: $(BIN) | |||
47 | ./$(BIN) tests/constants/numbers.bdl | 47 | ./$(BIN) tests/constants/numbers.bdl |
48 | 48 | ||
49 | run: $(BIN) | 49 | run: $(BIN) |
50 | $(BIN) tests/simple.bad | 50 | $(BIN) tests/literals.bad |
51 | 51 | ||
52 | viz_lex: $(BIN) | 52 | viz_lex: $(BIN) |
53 | $(BIN) -pl example.bdl | 53 | $(BIN) -pl example.bdl |
@@ -25,12 +25,12 @@ typedef enum TokenType { | |||
25 | TOK_UNKNOWN = 0, | 25 | TOK_UNKNOWN = 0, |
26 | 26 | ||
27 | // Parentheses. | 27 | // Parentheses. |
28 | TOK_LPAREN, | 28 | TOK_LPAREN, // ( |
29 | TOK_RPAREN, | 29 | TOK_RPAREN, // ) |
30 | TOK_LSQUARE, | 30 | TOK_LSQUARE, // [ |
31 | TOK_RSQUARE, | 31 | TOK_RSQUARE, // ] |
32 | TOK_LCURLY, | 32 | TOK_LCURLY, // { |
33 | TOK_RCURLY, | 33 | TOK_RCURLY, // } |
34 | 34 | ||
35 | // Basic literals. | 35 | // Basic literals. |
36 | TOK_NUMBER, | 36 | TOK_NUMBER, |
@@ -38,20 +38,20 @@ typedef enum TokenType { | |||
38 | TOK_STRING, | 38 | TOK_STRING, |
39 | 39 | ||
40 | // Keywords. | 40 | // Keywords. |
41 | TOK_LET, | 41 | TOK_BREAK, // break |
42 | TOK_SET, | 42 | TOK_CASE, // case |
43 | TOK_FUN, | 43 | TOK_CONTINUE, // continue |
44 | TOK_STRUCT, | 44 | TOK_FALSE, // false |
45 | TOK_IF, | 45 | TOK_FUN, // fun |
46 | TOK_MATCH, | 46 | TOK_IF, // if |
47 | TOK_CASE, | 47 | TOK_LET, // let |
48 | TOK_WHILE, | 48 | TOK_MATCH, // match |
49 | TOK_CONTINUE, | 49 | TOK_NIL, // nil |
50 | TOK_BREAK, | 50 | TOK_RETURN, // return |
51 | TOK_RETURN, | 51 | TOK_SET, // set |
52 | TOK_NIL, | 52 | TOK_STRUCT, // struct |
53 | TOK_TRUE, | 53 | TOK_TRUE, // true |
54 | TOK_FALSE, | 54 | TOK_WHILE, // while |
55 | 55 | ||
56 | // Arithmetic ops. | 56 | // Arithmetic ops. |
57 | TOK_ADD, // + | 57 | TOK_ADD, // + |
@@ -105,20 +105,20 @@ Str token_str[] = { | |||
105 | [TOK_STRING] = cstr("STRING"), | 105 | [TOK_STRING] = cstr("STRING"), |
106 | 106 | ||
107 | // Keywords. | 107 | // Keywords. |
108 | [TOK_LET] = cstr("LET"), | 108 | [TOK_BREAK] = cstr("BREAK"), |
109 | [TOK_SET] = cstr("SET"), | 109 | [TOK_CASE] = cstr("CASE"), |
110 | [TOK_CONTINUE] = cstr("CONTINUE"), | ||
111 | [TOK_FALSE] = cstr("FALSE"), | ||
110 | [TOK_FUN] = cstr("FUN"), | 112 | [TOK_FUN] = cstr("FUN"), |
111 | [TOK_STRUCT] = cstr("STRUCT"), | ||
112 | [TOK_IF] = cstr("IF"), | 113 | [TOK_IF] = cstr("IF"), |
114 | [TOK_LET] = cstr("LET"), | ||
113 | [TOK_MATCH] = cstr("MATCH"), | 115 | [TOK_MATCH] = cstr("MATCH"), |
114 | [TOK_CASE] = cstr("CASE"), | ||
115 | [TOK_WHILE] = cstr("WHILE"), | ||
116 | [TOK_CONTINUE] = cstr("CONTINUE"), | ||
117 | [TOK_BREAK] = cstr("BREAK"), | ||
118 | [TOK_RETURN] = cstr("RETURN"), | ||
119 | [TOK_NIL] = cstr("NIL"), | 116 | [TOK_NIL] = cstr("NIL"), |
117 | [TOK_RETURN] = cstr("RETURN"), | ||
118 | [TOK_SET] = cstr("SET"), | ||
119 | [TOK_STRUCT] = cstr("STRUCT"), | ||
120 | [TOK_TRUE] = cstr("TRUE"), | 120 | [TOK_TRUE] = cstr("TRUE"), |
121 | [TOK_FALSE] = cstr("FALSE"), | 121 | [TOK_WHILE] = cstr("WHILE"), |
122 | 122 | ||
123 | // Arithmetic ops. | 123 | // Arithmetic ops. |
124 | [TOK_ADD] = cstr("ADD"), | 124 | [TOK_ADD] = cstr("ADD"), |
@@ -319,7 +319,7 @@ emit_token_number(Scanner *scanner) { | |||
319 | scan_next(scanner); | 319 | scan_next(scanner); |
320 | while (scan_has_next(scanner)) { | 320 | while (scan_has_next(scanner)) { |
321 | c = scan_peek(scanner); | 321 | c = scan_peek(scanner); |
322 | if (c == '0' || c == '1') { | 322 | if (c == '0' || c == '1' || c == '_') { |
323 | scan_next(scanner); | 323 | scan_next(scanner); |
324 | continue; | 324 | continue; |
325 | } | 325 | } |
@@ -336,7 +336,7 @@ emit_token_number(Scanner *scanner) { | |||
336 | while (scan_has_next(scanner)) { | 336 | while (scan_has_next(scanner)) { |
337 | c = scan_peek(scanner); | 337 | c = scan_peek(scanner); |
338 | if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || | 338 | if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || |
339 | (c >= 'A' && c <= 'F')) { | 339 | (c >= 'A' && c <= 'F') || c == '_') { |
340 | scan_next(scanner); | 340 | scan_next(scanner); |
341 | continue; | 341 | continue; |
342 | } | 342 | } |
@@ -355,7 +355,7 @@ emit_token_number(Scanner *scanner) { | |||
355 | scan_next(scanner); | 355 | scan_next(scanner); |
356 | break; | 356 | break; |
357 | } | 357 | } |
358 | if (c >= '0' && c <= '9') { | 358 | if ((c >= '0' && c <= '9') || c == '_') { |
359 | scan_next(scanner); | 359 | scan_next(scanner); |
360 | continue; | 360 | continue; |
361 | } | 361 | } |
@@ -377,7 +377,7 @@ emit_token_number(Scanner *scanner) { | |||
377 | scan_next(scanner); | 377 | scan_next(scanner); |
378 | break; | 378 | break; |
379 | } | 379 | } |
380 | if (c >= '0' && c <= '9') { | 380 | if ((c >= '0' && c <= '9') || c == '_') { |
381 | scan_next(scanner); | 381 | scan_next(scanner); |
382 | continue; | 382 | continue; |
383 | } | 383 | } |
@@ -394,7 +394,7 @@ emit_token_number(Scanner *scanner) { | |||
394 | } | 394 | } |
395 | while (scan_has_next(scanner)) { | 395 | while (scan_has_next(scanner)) { |
396 | c = scan_peek(scanner); | 396 | c = scan_peek(scanner); |
397 | if (c >= '0' && c <= '9') { | 397 | if ((c >= '0' && c <= '9') || c == '_') { |
398 | scan_next(scanner); | 398 | scan_next(scanner); |
399 | continue; | 399 | continue; |
400 | } | 400 | } |
@@ -539,29 +539,80 @@ scan_token(Scanner *scanner) { | |||
539 | return emit_token_number(scanner); | 539 | return emit_token_number(scanner); |
540 | } | 540 | } |
541 | 541 | ||
542 | // TODO: keywords & literals | ||
543 | // Basic literals. | ||
544 | // TOK_SYMBOL, | ||
545 | |||
546 | // // Keywords. | ||
547 | // TOK_LET, | ||
548 | // TOK_SET, | ||
549 | // TOK_FUN, | ||
550 | // TOK_STRUCT, | ||
551 | // TOK_IF, | ||
552 | // TOK_MATCH, | ||
553 | // TOK_CASE, | ||
554 | // TOK_WHILE, | ||
555 | // TOK_CONTINUE, | ||
556 | // TOK_BREAK, | ||
557 | // TOK_RETURN, | ||
558 | // TOK_NIL, | ||
559 | // TOK_TRUE, | ||
560 | // TOK_FALSE, | ||
561 | |||
562 | // At this point we have an error, skip until we find whitespace again. | ||
563 | scan_skip_until_valid(scanner); | 542 | scan_skip_until_valid(scanner); |
564 | return emit_token_err(¤t, cstr("unexpected character")); | 543 | Str val = current.str; |
544 | val.size = current.str.size - scanner->str.size; | ||
545 | val.size = val.size < 0 ? 0 : val.size; | ||
546 | if (val.size == 0) { | ||
547 | return emit_token_err(¤t, cstr("unexpected character")); | ||
548 | } | ||
549 | switch (val.mem[0]) { | ||
550 | case 'b': { | ||
551 | if (str_has_prefix(val, cstr("break"))) { | ||
552 | return emit_token(current, scanner, TOK_BREAK); | ||
553 | } | ||
554 | } break; | ||
555 | case 'c': { | ||
556 | if (str_has_prefix(val, cstr("case"))) { | ||
557 | return emit_token(current, scanner, TOK_CASE); | ||
558 | } | ||
559 | if (str_has_prefix(val, cstr("continue"))) { | ||
560 | return emit_token(current, scanner, TOK_CONTINUE); | ||
561 | } | ||
562 | } break; | ||
563 | case 'f': { | ||
564 | if (str_has_prefix(val, cstr("false"))) { | ||
565 | return emit_token(current, scanner, TOK_FALSE); | ||
566 | } | ||
567 | if (str_has_prefix(val, cstr("fun"))) { | ||
568 | return emit_token(current, scanner, TOK_FUN); | ||
569 | } | ||
570 | } break; | ||
571 | case 'i': { | ||
572 | if (str_has_prefix(val, cstr("if"))) { | ||
573 | return emit_token(current, scanner, TOK_IF); | ||
574 | } | ||
575 | } break; | ||
576 | case 'l': { | ||
577 | if (str_has_prefix(val, cstr("let"))) { | ||
578 | return emit_token(current, scanner, TOK_LET); | ||
579 | } | ||
580 | } break; | ||
581 | case 'm': { | ||
582 | if (str_has_prefix(val, cstr("match"))) { | ||
583 | return emit_token(current, scanner, TOK_MATCH); | ||
584 | } | ||
585 | } break; | ||
586 | case 'n': { | ||
587 | if (str_has_prefix(val, cstr("nil"))) { | ||
588 | return emit_token(current, scanner, TOK_NIL); | ||
589 | } | ||
590 | } break; | ||
591 | case 'r': { | ||
592 | if (str_has_prefix(val, cstr("return"))) { | ||
593 | return emit_token(current, scanner, TOK_RETURN); | ||
594 | } | ||
595 | } break; | ||
596 | case 's': { | ||
597 | if (str_has_prefix(val, cstr("set"))) { | ||
598 | return emit_token(current, scanner, TOK_SET); | ||
599 | } | ||
600 | if (str_has_prefix(val, cstr("struct"))) { | ||
601 | return emit_token(current, scanner, TOK_STRUCT); | ||
602 | } | ||
603 | } break; | ||
604 | case 't': { | ||
605 | if (str_has_prefix(val, cstr("true"))) { | ||
606 | return emit_token(current, scanner, TOK_TRUE); | ||
607 | } | ||
608 | } break; | ||
609 | case 'w': { | ||
610 | if (str_has_prefix(val, cstr("while"))) { | ||
611 | return emit_token(current, scanner, TOK_WHILE); | ||
612 | } | ||
613 | } break; | ||
614 | } | ||
615 | return emit_token(current, scanner, TOK_SYMBOL); | ||
565 | } | 616 | } |
566 | 617 | ||
567 | void | 618 | void |
diff --git a/tests/literals.bad b/tests/literals.bad index 673494d..e958bba 100644 --- a/tests/literals.bad +++ b/tests/literals.bad | |||
@@ -23,7 +23,6 @@ | |||
23 | 2.45e5 ; we can use scientific notation | 23 | 2.45e5 ; we can use scientific notation |
24 | +1.23e+6 ; +/- can be on the number and/or the exponent | 24 | +1.23e+6 ; +/- can be on the number and/or the exponent |
25 | -3.14e-1 ; the exponents are always integers | 25 | -3.14e-1 ; the exponents are always integers |
26 | -3.21e+0xff ; ... in any of its forms | ||
27 | 26 | ||
28 | ; Booleans. | 27 | ; Booleans. |
29 | true | 28 | true |