diff options
Diffstat (limited to 'src/lexer.c')
-rw-r--r-- | src/lexer.c | 152 |
1 files changed, 115 insertions, 37 deletions
diff --git a/src/lexer.c b/src/lexer.c index 2feba2a..84b69e7 100644 --- a/src/lexer.c +++ b/src/lexer.c | |||
@@ -41,18 +41,24 @@ typedef enum TokenKind { | |||
41 | TOK_STRUCT, // struct | 41 | TOK_STRUCT, // struct |
42 | TOK_TRUE, // true | 42 | TOK_TRUE, // true |
43 | TOK_WHILE, // while | 43 | TOK_WHILE, // while |
44 | TOK_FOR, // for | ||
44 | 45 | ||
45 | // Arithmetic ops. | 46 | // Arithmetic ops. |
46 | TOK_ADD, // + | 47 | TOK_ADD, // + |
47 | TOK_SUB, // - | 48 | TOK_SUB, // - |
48 | TOK_MUL, // * | 49 | TOK_MUL, // * |
49 | TOK_DIV, // / | 50 | TOK_DIV, // / |
50 | TOK_MOD, // % | 51 | TOK_MOD, // % |
52 | TOK_ADD_ASSIGN, // += | ||
53 | TOK_SUB_ASSIGN, // -= | ||
54 | TOK_MUL_ASSIGN, // *= | ||
55 | TOK_DIV_ASSIGN, // /= | ||
56 | TOK_MOD_ASSIGN, // %= | ||
51 | 57 | ||
52 | // Logical ops. | 58 | // Logical ops. |
53 | TOK_NOT, // ! | 59 | TOK_NOT, // ! |
54 | TOK_AND, // && | 60 | TOK_AND, // and |
55 | TOK_OR, // || | 61 | TOK_OR, // or |
56 | TOK_EQ, // == | 62 | TOK_EQ, // == |
57 | TOK_NEQ, // != | 63 | TOK_NEQ, // != |
58 | TOK_LT, // < | 64 | TOK_LT, // < |
@@ -61,11 +67,17 @@ typedef enum TokenKind { | |||
61 | TOK_GE, // >= | 67 | TOK_GE, // >= |
62 | 68 | ||
63 | // Bitwise ops. | 69 | // Bitwise ops. |
64 | TOK_BITNOT, // ~ | 70 | TOK_BITNOT, // ~ |
65 | TOK_BITAND, // & | 71 | TOK_BITAND, // & |
66 | TOK_BITOR, // | | 72 | TOK_BITOR, // | |
67 | TOK_BITLSHIFT, // << | 73 | TOK_BITXOR, // ^ |
68 | TOK_BITRSHIFT, // >> | 74 | TOK_BITLSHIFT, // << |
75 | TOK_BITRSHIFT, // >> | ||
76 | TOK_BITAND_ASSIGN, // &= | ||
77 | TOK_BITOR_ASSIGN, // |= | ||
78 | TOK_BITXOR_ASSIGN, // ^= | ||
79 | TOK_BITLSHIFT_ASSIGN, // <<= | ||
80 | TOK_BITRSHIFT_ASSIGN, // >>= | ||
69 | 81 | ||
70 | // Special ops. | 82 | // Special ops. |
71 | TOK_COLON, // : | 83 | TOK_COLON, // : |
@@ -113,6 +125,7 @@ Str token_str[] = { | |||
113 | [TOK_STRUCT] = cstr("STRUCT"), | 125 | [TOK_STRUCT] = cstr("STRUCT"), |
114 | [TOK_TRUE] = cstr("TRUE"), | 126 | [TOK_TRUE] = cstr("TRUE"), |
115 | [TOK_WHILE] = cstr("WHILE"), | 127 | [TOK_WHILE] = cstr("WHILE"), |
128 | [TOK_FOR] = cstr("FOR"), | ||
116 | 129 | ||
117 | // Arithmetic ops. | 130 | // Arithmetic ops. |
118 | [TOK_ADD] = cstr("ADD"), | 131 | [TOK_ADD] = cstr("ADD"), |
@@ -120,6 +133,11 @@ Str token_str[] = { | |||
120 | [TOK_MUL] = cstr("MUL"), | 133 | [TOK_MUL] = cstr("MUL"), |
121 | [TOK_DIV] = cstr("DIV"), | 134 | [TOK_DIV] = cstr("DIV"), |
122 | [TOK_MOD] = cstr("MOD"), | 135 | [TOK_MOD] = cstr("MOD"), |
136 | [TOK_ADD_ASSIGN] = cstr("ADD_ASSIGN"), | ||
137 | [TOK_SUB_ASSIGN] = cstr("SUB_ASSIGN"), | ||
138 | [TOK_MUL_ASSIGN] = cstr("MUL_ASSIGN"), | ||
139 | [TOK_DIV_ASSIGN] = cstr("DIV_ASSIGN"), | ||
140 | [TOK_MOD_ASSIGN] = cstr("MOD_ASSIGN"), | ||
123 | 141 | ||
124 | // Logical ops. | 142 | // Logical ops. |
125 | [TOK_NOT] = cstr("NOT"), | 143 | [TOK_NOT] = cstr("NOT"), |
@@ -136,8 +154,14 @@ Str token_str[] = { | |||
136 | [TOK_BITNOT] = cstr("BITNOT"), | 154 | [TOK_BITNOT] = cstr("BITNOT"), |
137 | [TOK_BITAND] = cstr("BITAND"), | 155 | [TOK_BITAND] = cstr("BITAND"), |
138 | [TOK_BITOR] = cstr("BITOR"), | 156 | [TOK_BITOR] = cstr("BITOR"), |
157 | [TOK_BITXOR] = cstr("BITXOR"), | ||
139 | [TOK_BITLSHIFT] = cstr("BITLSHIFT"), | 158 | [TOK_BITLSHIFT] = cstr("BITLSHIFT"), |
140 | [TOK_BITRSHIFT] = cstr("BITRSHIFT"), | 159 | [TOK_BITRSHIFT] = cstr("BITRSHIFT"), |
160 | [TOK_BITAND_ASSIGN] = cstr("BITAND_ASSIGN"), | ||
161 | [TOK_BITOR_ASSIGN] = cstr("BITOR_ASSIGN"), | ||
162 | [TOK_BITXOR_ASSIGN] = cstr("BITXOR_ASSIGN"), | ||
163 | [TOK_BITLSHIFT_ASSIGN] = cstr("BITLSHIFT_ASSIGN"), | ||
164 | [TOK_BITRSHIFT_ASSIGN] = cstr("BITRSHIFT_ASSIGN"), | ||
141 | 165 | ||
142 | // Special ops. | 166 | // Special ops. |
143 | [TOK_COLON] = cstr("COLON"), | 167 | [TOK_COLON] = cstr("COLON"), |
@@ -432,6 +456,10 @@ scan_token(Scanner *scanner) { | |||
432 | *scanner = current; | 456 | *scanner = current; |
433 | return emit_token_number(scanner); | 457 | return emit_token_number(scanner); |
434 | } | 458 | } |
459 | if (p == '=') { | ||
460 | scan_next(scanner); | ||
461 | return emit_token(current, scanner, TOK_ADD_ASSIGN); | ||
462 | } | ||
435 | return emit_token(current, scanner, TOK_ADD); | 463 | return emit_token(current, scanner, TOK_ADD); |
436 | }; | 464 | }; |
437 | case '-': { | 465 | case '-': { |
@@ -440,11 +468,33 @@ scan_token(Scanner *scanner) { | |||
440 | *scanner = current; | 468 | *scanner = current; |
441 | return emit_token_number(scanner); | 469 | return emit_token_number(scanner); |
442 | } | 470 | } |
471 | if (p == '=') { | ||
472 | scan_next(scanner); | ||
473 | return emit_token(current, scanner, TOK_SUB_ASSIGN); | ||
474 | } | ||
443 | return emit_token(current, scanner, TOK_SUB); | 475 | return emit_token(current, scanner, TOK_SUB); |
444 | }; | 476 | }; |
445 | case '*': return emit_token(current, scanner, TOK_MUL); | 477 | case '*': { |
446 | case '/': return emit_token(current, scanner, TOK_DIV); | 478 | if (scan_peek(scanner) == '=') { |
447 | case '%': return emit_token(current, scanner, TOK_MOD); | 479 | scan_next(scanner); |
480 | return emit_token(current, scanner, TOK_MUL_ASSIGN); | ||
481 | } | ||
482 | return emit_token(current, scanner, TOK_MUL); | ||
483 | } | ||
484 | case '/': { | ||
485 | if (scan_peek(scanner) == '=') { | ||
486 | scan_next(scanner); | ||
487 | return emit_token(current, scanner, TOK_DIV_ASSIGN); | ||
488 | } | ||
489 | return emit_token(current, scanner, TOK_DIV); | ||
490 | } | ||
491 | case '%': { | ||
492 | if (scan_peek(scanner) == '=') { | ||
493 | scan_next(scanner); | ||
494 | return emit_token(current, scanner, TOK_MOD_ASSIGN); | ||
495 | } | ||
496 | return emit_token(current, scanner, TOK_MOD); | ||
497 | } | ||
448 | case '!': { | 498 | case '!': { |
449 | if (scan_peek(scanner) == '=') { | 499 | if (scan_peek(scanner) == '=') { |
450 | scan_next(scanner); | 500 | scan_next(scanner); |
@@ -467,6 +517,10 @@ scan_token(Scanner *scanner) { | |||
467 | } | 517 | } |
468 | if (p == '<') { | 518 | if (p == '<') { |
469 | scan_next(scanner); | 519 | scan_next(scanner); |
520 | if (scan_peek(scanner) == '=') { | ||
521 | scan_next(scanner); | ||
522 | return emit_token(current, scanner, TOK_BITLSHIFT_ASSIGN); | ||
523 | } | ||
470 | return emit_token(current, scanner, TOK_BITLSHIFT); | 524 | return emit_token(current, scanner, TOK_BITLSHIFT); |
471 | } | 525 | } |
472 | return emit_token(current, scanner, TOK_LT); | 526 | return emit_token(current, scanner, TOK_LT); |
@@ -479,22 +533,33 @@ scan_token(Scanner *scanner) { | |||
479 | } | 533 | } |
480 | if (p == '>') { | 534 | if (p == '>') { |
481 | scan_next(scanner); | 535 | scan_next(scanner); |
536 | if (scan_peek(scanner) == '=') { | ||
537 | scan_next(scanner); | ||
538 | return emit_token(current, scanner, TOK_BITRSHIFT_ASSIGN); | ||
539 | } | ||
482 | return emit_token(current, scanner, TOK_BITRSHIFT); | 540 | return emit_token(current, scanner, TOK_BITRSHIFT); |
483 | } | 541 | } |
484 | return emit_token(current, scanner, TOK_GT); | 542 | return emit_token(current, scanner, TOK_GT); |
485 | }; | 543 | }; |
486 | case '~': return emit_token(current, scanner, TOK_BITNOT); | 544 | case '~': return emit_token(current, scanner, TOK_BITNOT); |
545 | case '^': { | ||
546 | if (scan_peek(scanner) == '=') { | ||
547 | scan_next(scanner); | ||
548 | return emit_token(current, scanner, TOK_BITXOR_ASSIGN); | ||
549 | } | ||
550 | return emit_token(current, scanner, TOK_BITXOR); | ||
551 | }; | ||
487 | case '&': { | 552 | case '&': { |
488 | if (scan_peek(scanner) == '&') { | 553 | if (scan_peek(scanner) == '=') { |
489 | scan_next(scanner); | 554 | scan_next(scanner); |
490 | return emit_token(current, scanner, TOK_AND); | 555 | return emit_token(current, scanner, TOK_BITOR_ASSIGN); |
491 | } | 556 | } |
492 | return emit_token(current, scanner, TOK_BITAND); | 557 | return emit_token(current, scanner, TOK_BITAND); |
493 | }; | 558 | }; |
494 | case '|': { | 559 | case '|': { |
495 | if (scan_peek(scanner) == '|') { | 560 | if (scan_peek(scanner) == '=') { |
496 | scan_next(scanner); | 561 | scan_next(scanner); |
497 | return emit_token(current, scanner, TOK_OR); | 562 | return emit_token(current, scanner, TOK_BITOR_ASSIGN); |
498 | } | 563 | } |
499 | return emit_token(current, scanner, TOK_BITOR); | 564 | return emit_token(current, scanner, TOK_BITOR); |
500 | }; | 565 | }; |
@@ -535,78 +600,91 @@ scan_token(Scanner *scanner) { | |||
535 | return emit_token_err(¤t, cstr("unexpected character")); | 600 | return emit_token_err(¤t, cstr("unexpected character")); |
536 | } | 601 | } |
537 | switch (val.mem[0]) { | 602 | switch (val.mem[0]) { |
603 | case 'a': { | ||
604 | if (str_eq(val, cstr("and"))) { | ||
605 | return emit_token(current, scanner, TOK_AND); | ||
606 | } | ||
607 | } break; | ||
538 | case 'b': { | 608 | case 'b': { |
539 | if (str_has_prefix(val, cstr("break"))) { | 609 | if (str_eq(val, cstr("break"))) { |
540 | return emit_token(current, scanner, TOK_BREAK); | 610 | return emit_token(current, scanner, TOK_BREAK); |
541 | } | 611 | } |
542 | } break; | 612 | } break; |
543 | case 'c': { | 613 | case 'c': { |
544 | if (str_has_prefix(val, cstr("case"))) { | 614 | if (str_eq(val, cstr("case"))) { |
545 | return emit_token(current, scanner, TOK_CASE); | 615 | return emit_token(current, scanner, TOK_CASE); |
546 | } | 616 | } |
547 | if (str_has_prefix(val, cstr("continue"))) { | 617 | if (str_eq(val, cstr("continue"))) { |
548 | return emit_token(current, scanner, TOK_CONTINUE); | 618 | return emit_token(current, scanner, TOK_CONTINUE); |
549 | } | 619 | } |
550 | if (str_has_prefix(val, cstr("cond"))) { | 620 | if (str_eq(val, cstr("cond"))) { |
551 | return emit_token(current, scanner, TOK_COND); | 621 | return emit_token(current, scanner, TOK_COND); |
552 | } | 622 | } |
553 | } break; | 623 | } break; |
554 | case 'e': { | 624 | case 'e': { |
555 | if (str_has_prefix(val, cstr("else"))) { | 625 | if (str_eq(val, cstr("else"))) { |
556 | return emit_token(current, scanner, TOK_ELSE); | 626 | return emit_token(current, scanner, TOK_ELSE); |
557 | } | 627 | } |
558 | if (str_has_prefix(val, cstr("enum"))) { | 628 | if (str_eq(val, cstr("enum"))) { |
559 | return emit_token(current, scanner, TOK_ENUM); | 629 | return emit_token(current, scanner, TOK_ENUM); |
560 | } | 630 | } |
561 | } break; | 631 | } break; |
562 | case 'f': { | 632 | case 'f': { |
563 | if (str_has_prefix(val, cstr("false"))) { | 633 | if (str_eq(val, cstr("false"))) { |
564 | return emit_token(current, scanner, TOK_FALSE); | 634 | return emit_token(current, scanner, TOK_FALSE); |
565 | } | 635 | } |
566 | if (str_has_prefix(val, cstr("fun"))) { | 636 | if (str_eq(val, cstr("fun"))) { |
567 | return emit_token(current, scanner, TOK_FUN); | 637 | return emit_token(current, scanner, TOK_FUN); |
568 | } | 638 | } |
639 | if (str_eq(val, cstr("for"))) { | ||
640 | return emit_token(current, scanner, TOK_FOR); | ||
641 | } | ||
569 | } break; | 642 | } break; |
570 | case 'i': { | 643 | case 'i': { |
571 | if (str_has_prefix(val, cstr("if"))) { | 644 | if (str_eq(val, cstr("if"))) { |
572 | return emit_token(current, scanner, TOK_IF); | 645 | return emit_token(current, scanner, TOK_IF); |
573 | } | 646 | } |
574 | } break; | 647 | } break; |
575 | case 'l': { | 648 | case 'l': { |
576 | if (str_has_prefix(val, cstr("let"))) { | 649 | if (str_eq(val, cstr("let"))) { |
577 | return emit_token(current, scanner, TOK_LET); | 650 | return emit_token(current, scanner, TOK_LET); |
578 | } | 651 | } |
579 | } break; | 652 | } break; |
580 | case 'm': { | 653 | case 'm': { |
581 | if (str_has_prefix(val, cstr("match"))) { | 654 | if (str_eq(val, cstr("match"))) { |
582 | return emit_token(current, scanner, TOK_MATCH); | 655 | return emit_token(current, scanner, TOK_MATCH); |
583 | } | 656 | } |
584 | } break; | 657 | } break; |
585 | case 'n': { | 658 | case 'n': { |
586 | if (str_has_prefix(val, cstr("nil"))) { | 659 | if (str_eq(val, cstr("nil"))) { |
587 | return emit_token(current, scanner, TOK_NIL); | 660 | return emit_token(current, scanner, TOK_NIL); |
588 | } | 661 | } |
589 | } break; | 662 | } break; |
590 | case 'r': { | 663 | case 'r': { |
591 | if (str_has_prefix(val, cstr("return"))) { | 664 | if (str_eq(val, cstr("return"))) { |
592 | return emit_token(current, scanner, TOK_RETURN); | 665 | return emit_token(current, scanner, TOK_RETURN); |
593 | } | 666 | } |
594 | } break; | 667 | } break; |
595 | case 's': { | 668 | case 's': { |
596 | if (str_has_prefix(val, cstr("set"))) { | 669 | if (str_eq(val, cstr("set"))) { |
597 | return emit_token(current, scanner, TOK_SET); | 670 | return emit_token(current, scanner, TOK_SET); |
598 | } | 671 | } |
599 | if (str_has_prefix(val, cstr("struct"))) { | 672 | if (str_eq(val, cstr("struct"))) { |
600 | return emit_token(current, scanner, TOK_STRUCT); | 673 | return emit_token(current, scanner, TOK_STRUCT); |
601 | } | 674 | } |
602 | } break; | 675 | } break; |
603 | case 't': { | 676 | case 't': { |
604 | if (str_has_prefix(val, cstr("true"))) { | 677 | if (str_eq(val, cstr("true"))) { |
605 | return emit_token(current, scanner, TOK_TRUE); | 678 | return emit_token(current, scanner, TOK_TRUE); |
606 | } | 679 | } |
607 | } break; | 680 | } break; |
681 | case 'o': { | ||
682 | if (str_eq(val, cstr("or"))) { | ||
683 | return emit_token(current, scanner, TOK_OR); | ||
684 | } | ||
685 | } break; | ||
608 | case 'w': { | 686 | case 'w': { |
609 | if (str_has_prefix(val, cstr("while"))) { | 687 | if (str_eq(val, cstr("while"))) { |
610 | return emit_token(current, scanner, TOK_WHILE); | 688 | return emit_token(current, scanner, TOK_WHILE); |
611 | } | 689 | } |
612 | } break; | 690 | } break; |
@@ -628,4 +706,4 @@ print_tokens(Str path, Token *tokens) { | |||
628 | } | 706 | } |
629 | } | 707 | } |
630 | 708 | ||
631 | #endif // LEXER_C | 709 | #endif // LEXER_C |