aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2024-06-15 16:10:16 +0200
committerBad Diode <bd@badd10de.dev>2024-06-15 16:10:16 +0200
commit893b52223d274c675272cee55768a9d5853420fb (patch)
treee9b57ec842cf622c7b50ec7f1cfb29914e7a7251
parent99b92b160af5b9475262676aef7dd7f209429298 (diff)
downloadbdl-893b52223d274c675272cee55768a9d5853420fb.tar.gz
bdl-893b52223d274c675272cee55768a9d5853420fb.zip
Finish basic lexing
-rw-r--r--Makefile2
-rw-r--r--src/main.c163
-rw-r--r--tests/literals.bad1
3 files changed, 108 insertions, 58 deletions
diff --git a/Makefile b/Makefile
index 1d894dd..45c1389 100644
--- a/Makefile
+++ b/Makefile
@@ -47,7 +47,7 @@ tests: $(BIN)
47 ./$(BIN) tests/constants/numbers.bdl 47 ./$(BIN) tests/constants/numbers.bdl
48 48
49run: $(BIN) 49run: $(BIN)
50 $(BIN) tests/simple.bad 50 $(BIN) tests/literals.bad
51 51
52viz_lex: $(BIN) 52viz_lex: $(BIN)
53 $(BIN) -pl example.bdl 53 $(BIN) -pl example.bdl
diff --git a/src/main.c b/src/main.c
index 9246092..edd70aa 100644
--- a/src/main.c
+++ b/src/main.c
@@ -25,12 +25,12 @@ typedef enum TokenType {
25 TOK_UNKNOWN = 0, 25 TOK_UNKNOWN = 0,
26 26
27 // Parentheses. 27 // Parentheses.
28 TOK_LPAREN, 28 TOK_LPAREN, // (
29 TOK_RPAREN, 29 TOK_RPAREN, // )
30 TOK_LSQUARE, 30 TOK_LSQUARE, // [
31 TOK_RSQUARE, 31 TOK_RSQUARE, // ]
32 TOK_LCURLY, 32 TOK_LCURLY, // {
33 TOK_RCURLY, 33 TOK_RCURLY, // }
34 34
35 // Basic literals. 35 // Basic literals.
36 TOK_NUMBER, 36 TOK_NUMBER,
@@ -38,20 +38,20 @@ typedef enum TokenType {
38 TOK_STRING, 38 TOK_STRING,
39 39
40 // Keywords. 40 // Keywords.
41 TOK_LET, 41 TOK_BREAK, // break
42 TOK_SET, 42 TOK_CASE, // case
43 TOK_FUN, 43 TOK_CONTINUE, // continue
44 TOK_STRUCT, 44 TOK_FALSE, // false
45 TOK_IF, 45 TOK_FUN, // fun
46 TOK_MATCH, 46 TOK_IF, // if
47 TOK_CASE, 47 TOK_LET, // let
48 TOK_WHILE, 48 TOK_MATCH, // match
49 TOK_CONTINUE, 49 TOK_NIL, // nil
50 TOK_BREAK, 50 TOK_RETURN, // return
51 TOK_RETURN, 51 TOK_SET, // set
52 TOK_NIL, 52 TOK_STRUCT, // struct
53 TOK_TRUE, 53 TOK_TRUE, // true
54 TOK_FALSE, 54 TOK_WHILE, // while
55 55
56 // Arithmetic ops. 56 // Arithmetic ops.
57 TOK_ADD, // + 57 TOK_ADD, // +
@@ -105,20 +105,20 @@ Str token_str[] = {
105 [TOK_STRING] = cstr("STRING"), 105 [TOK_STRING] = cstr("STRING"),
106 106
107 // Keywords. 107 // Keywords.
108 [TOK_LET] = cstr("LET"), 108 [TOK_BREAK] = cstr("BREAK"),
109 [TOK_SET] = cstr("SET"), 109 [TOK_CASE] = cstr("CASE"),
110 [TOK_CONTINUE] = cstr("CONTINUE"),
111 [TOK_FALSE] = cstr("FALSE"),
110 [TOK_FUN] = cstr("FUN"), 112 [TOK_FUN] = cstr("FUN"),
111 [TOK_STRUCT] = cstr("STRUCT"),
112 [TOK_IF] = cstr("IF"), 113 [TOK_IF] = cstr("IF"),
114 [TOK_LET] = cstr("LET"),
113 [TOK_MATCH] = cstr("MATCH"), 115 [TOK_MATCH] = cstr("MATCH"),
114 [TOK_CASE] = cstr("CASE"),
115 [TOK_WHILE] = cstr("WHILE"),
116 [TOK_CONTINUE] = cstr("CONTINUE"),
117 [TOK_BREAK] = cstr("BREAK"),
118 [TOK_RETURN] = cstr("RETURN"),
119 [TOK_NIL] = cstr("NIL"), 116 [TOK_NIL] = cstr("NIL"),
117 [TOK_RETURN] = cstr("RETURN"),
118 [TOK_SET] = cstr("SET"),
119 [TOK_STRUCT] = cstr("STRUCT"),
120 [TOK_TRUE] = cstr("TRUE"), 120 [TOK_TRUE] = cstr("TRUE"),
121 [TOK_FALSE] = cstr("FALSE"), 121 [TOK_WHILE] = cstr("WHILE"),
122 122
123 // Arithmetic ops. 123 // Arithmetic ops.
124 [TOK_ADD] = cstr("ADD"), 124 [TOK_ADD] = cstr("ADD"),
@@ -319,7 +319,7 @@ emit_token_number(Scanner *scanner) {
319 scan_next(scanner); 319 scan_next(scanner);
320 while (scan_has_next(scanner)) { 320 while (scan_has_next(scanner)) {
321 c = scan_peek(scanner); 321 c = scan_peek(scanner);
322 if (c == '0' || c == '1') { 322 if (c == '0' || c == '1' || c == '_') {
323 scan_next(scanner); 323 scan_next(scanner);
324 continue; 324 continue;
325 } 325 }
@@ -336,7 +336,7 @@ emit_token_number(Scanner *scanner) {
336 while (scan_has_next(scanner)) { 336 while (scan_has_next(scanner)) {
337 c = scan_peek(scanner); 337 c = scan_peek(scanner);
338 if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || 338 if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
339 (c >= 'A' && c <= 'F')) { 339 (c >= 'A' && c <= 'F') || c == '_') {
340 scan_next(scanner); 340 scan_next(scanner);
341 continue; 341 continue;
342 } 342 }
@@ -355,7 +355,7 @@ emit_token_number(Scanner *scanner) {
355 scan_next(scanner); 355 scan_next(scanner);
356 break; 356 break;
357 } 357 }
358 if (c >= '0' && c <= '9') { 358 if ((c >= '0' && c <= '9') || c == '_') {
359 scan_next(scanner); 359 scan_next(scanner);
360 continue; 360 continue;
361 } 361 }
@@ -377,7 +377,7 @@ emit_token_number(Scanner *scanner) {
377 scan_next(scanner); 377 scan_next(scanner);
378 break; 378 break;
379 } 379 }
380 if (c >= '0' && c <= '9') { 380 if ((c >= '0' && c <= '9') || c == '_') {
381 scan_next(scanner); 381 scan_next(scanner);
382 continue; 382 continue;
383 } 383 }
@@ -394,7 +394,7 @@ emit_token_number(Scanner *scanner) {
394 } 394 }
395 while (scan_has_next(scanner)) { 395 while (scan_has_next(scanner)) {
396 c = scan_peek(scanner); 396 c = scan_peek(scanner);
397 if (c >= '0' && c <= '9') { 397 if ((c >= '0' && c <= '9') || c == '_') {
398 scan_next(scanner); 398 scan_next(scanner);
399 continue; 399 continue;
400 } 400 }
@@ -539,29 +539,80 @@ scan_token(Scanner *scanner) {
539 return emit_token_number(scanner); 539 return emit_token_number(scanner);
540 } 540 }
541 541
542 // TODO: keywords & literals
543 // Basic literals.
544 // TOK_SYMBOL,
545
546 // // Keywords.
547 // TOK_LET,
548 // TOK_SET,
549 // TOK_FUN,
550 // TOK_STRUCT,
551 // TOK_IF,
552 // TOK_MATCH,
553 // TOK_CASE,
554 // TOK_WHILE,
555 // TOK_CONTINUE,
556 // TOK_BREAK,
557 // TOK_RETURN,
558 // TOK_NIL,
559 // TOK_TRUE,
560 // TOK_FALSE,
561
562 // At this point we have an error, skip until we find whitespace again.
563 scan_skip_until_valid(scanner); 542 scan_skip_until_valid(scanner);
564 return emit_token_err(&current, cstr("unexpected character")); 543 Str val = current.str;
544 val.size = current.str.size - scanner->str.size;
545 val.size = val.size < 0 ? 0 : val.size;
546 if (val.size == 0) {
547 return emit_token_err(&current, cstr("unexpected character"));
548 }
549 switch (val.mem[0]) {
550 case 'b': {
551 if (str_has_prefix(val, cstr("break"))) {
552 return emit_token(current, scanner, TOK_BREAK);
553 }
554 } break;
555 case 'c': {
556 if (str_has_prefix(val, cstr("case"))) {
557 return emit_token(current, scanner, TOK_CASE);
558 }
559 if (str_has_prefix(val, cstr("continue"))) {
560 return emit_token(current, scanner, TOK_CONTINUE);
561 }
562 } break;
563 case 'f': {
564 if (str_has_prefix(val, cstr("false"))) {
565 return emit_token(current, scanner, TOK_FALSE);
566 }
567 if (str_has_prefix(val, cstr("fun"))) {
568 return emit_token(current, scanner, TOK_FUN);
569 }
570 } break;
571 case 'i': {
572 if (str_has_prefix(val, cstr("if"))) {
573 return emit_token(current, scanner, TOK_IF);
574 }
575 } break;
576 case 'l': {
577 if (str_has_prefix(val, cstr("let"))) {
578 return emit_token(current, scanner, TOK_LET);
579 }
580 } break;
581 case 'm': {
582 if (str_has_prefix(val, cstr("match"))) {
583 return emit_token(current, scanner, TOK_MATCH);
584 }
585 } break;
586 case 'n': {
587 if (str_has_prefix(val, cstr("nil"))) {
588 return emit_token(current, scanner, TOK_NIL);
589 }
590 } break;
591 case 'r': {
592 if (str_has_prefix(val, cstr("return"))) {
593 return emit_token(current, scanner, TOK_RETURN);
594 }
595 } break;
596 case 's': {
597 if (str_has_prefix(val, cstr("set"))) {
598 return emit_token(current, scanner, TOK_SET);
599 }
600 if (str_has_prefix(val, cstr("struct"))) {
601 return emit_token(current, scanner, TOK_STRUCT);
602 }
603 } break;
604 case 't': {
605 if (str_has_prefix(val, cstr("true"))) {
606 return emit_token(current, scanner, TOK_TRUE);
607 }
608 } break;
609 case 'w': {
610 if (str_has_prefix(val, cstr("while"))) {
611 return emit_token(current, scanner, TOK_WHILE);
612 }
613 } break;
614 }
615 return emit_token(current, scanner, TOK_SYMBOL);
565} 616}
566 617
567void 618void
diff --git a/tests/literals.bad b/tests/literals.bad
index 673494d..e958bba 100644
--- a/tests/literals.bad
+++ b/tests/literals.bad
@@ -23,7 +23,6 @@
232.45e5 ; we can use scientific notation 232.45e5 ; we can use scientific notation
24+1.23e+6 ; +/- can be on the number and/or the exponent 24+1.23e+6 ; +/- can be on the number and/or the exponent
25-3.14e-1 ; the exponents are always integers 25-3.14e-1 ; the exponents are always integers
26-3.21e+0xff ; ... in any of its forms
27 26
28; Booleans. 27; Booleans.
29true 28true