Finish basic lexing

author: Bad Diode <bd@badd10de.dev> 2024-06-15 16:10:16 +0200
committer: Bad Diode <bd@badd10de.dev> 2024-06-15 16:10:16 +0200
commit: 893b52223d274c675272cee55768a9d5853420fb (patch)
tree: e9b57ec842cf622c7b50ec7f1cfb29914e7a7251
parent: 99b92b160af5b9475262676aef7dd7f209429298 (diff)
download: bdl-893b52223d274c675272cee55768a9d5853420fb.tar.gz
bdl-893b52223d274c675272cee55768a9d5853420fb.zip
3 files changed, 108 insertions, 58 deletions
diff --git a/Makefile b/Makefile
index 1d894dd..45c1389 100644
--- a/Makefile
+++ b/Makefile
@@ -47,7 +47,7 @@ tests: $(BIN)
        ./$(BIN) tests/constants/numbers.bdl
 run: $(BIN)
-        $(BIN) tests/simple.bad
+        $(BIN) tests/literals.bad
 viz_lex: $(BIN)
        $(BIN) -pl example.bdl
diff --git a/src/main.c b/src/main.c
index 9246092..edd70aa 100644
--- a/src/main.c
+++ b/src/main.c
@@ -25,12 +25,12 @@ typedef enum TokenType {
    TOK_UNKNOWN = 0,
    // Parentheses.
-    TOK_LPAREN,
+    TOK_LPAREN,   // (
-    TOK_RPAREN,
+    TOK_RPAREN,   // )
-    TOK_LSQUARE,
+    TOK_LSQUARE,  // [
-    TOK_RSQUARE,
+    TOK_RSQUARE,  // ]
-    TOK_LCURLY,
+    TOK_LCURLY,   // {
-    TOK_RCURLY,
+    TOK_RCURLY,   // }
    // Basic literals.
    TOK_NUMBER,
@@ -38,20 +38,20 @@ typedef enum TokenType {
    TOK_STRING,
    // Keywords.
-    TOK_LET,
+    TOK_BREAK,     // break
-    TOK_SET,
+    TOK_CASE,      // case
-    TOK_FUN,
+    TOK_CONTINUE,  // continue
-    TOK_STRUCT,
+    TOK_FALSE,     // false
-    TOK_IF,
+    TOK_FUN,       // fun
-    TOK_MATCH,
+    TOK_IF,        // if
-    TOK_CASE,
+    TOK_LET,       // let
-    TOK_WHILE,
+    TOK_MATCH,     // match
-    TOK_CONTINUE,
+    TOK_NIL,       // nil
-    TOK_BREAK,
+    TOK_RETURN,    // return
-    TOK_RETURN,
+    TOK_SET,       // set
-    TOK_NIL,
+    TOK_STRUCT,    // struct
-    TOK_TRUE,
+    TOK_TRUE,      // true
-    TOK_FALSE,
+    TOK_WHILE,     // while
    // Arithmetic ops.
    TOK_ADD,  // +
@@ -105,20 +105,20 @@ Str token_str[] = {
    [TOK_STRING] = cstr("STRING"),
    // Keywords.
-    [TOK_LET] = cstr("LET"),
+    [TOK_BREAK] = cstr("BREAK"),
-    [TOK_SET] = cstr("SET"),
+    [TOK_CASE] = cstr("CASE"),
+    [TOK_CONTINUE] = cstr("CONTINUE"),
+    [TOK_FALSE] = cstr("FALSE"),
    [TOK_FUN] = cstr("FUN"),
-    [TOK_STRUCT] = cstr("STRUCT"),
    [TOK_IF] = cstr("IF"),
+    [TOK_LET] = cstr("LET"),
    [TOK_MATCH] = cstr("MATCH"),
-    [TOK_CASE] = cstr("CASE"),
-    [TOK_WHILE] = cstr("WHILE"),
-    [TOK_CONTINUE] = cstr("CONTINUE"),
-    [TOK_BREAK] = cstr("BREAK"),
-    [TOK_RETURN] = cstr("RETURN"),
    [TOK_NIL] = cstr("NIL"),
+    [TOK_RETURN] = cstr("RETURN"),
+    [TOK_SET] = cstr("SET"),
+    [TOK_STRUCT] = cstr("STRUCT"),
    [TOK_TRUE] = cstr("TRUE"),
-    [TOK_FALSE] = cstr("FALSE"),
+    [TOK_WHILE] = cstr("WHILE"),
    // Arithmetic ops.
    [TOK_ADD] = cstr("ADD"),
@@ -319,7 +319,7 @@ emit_token_number(Scanner *scanner) {
        scan_next(scanner);
        while (scan_has_next(scanner)) {
            c = scan_peek(scanner);
-            if (c == '0' || c == '1') {
+            if (c == '0' || c == '1' || c == '_') {
                scan_next(scanner);
                continue;
            }
@@ -336,7 +336,7 @@ emit_token_number(Scanner *scanner) {
        while (scan_has_next(scanner)) {
            c = scan_peek(scanner);
            if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
-                (c >= 'A' && c <= 'F')) {
+                (c >= 'A' && c <= 'F') || c == '_') {
                scan_next(scanner);
                continue;
            }
@@ -355,7 +355,7 @@ emit_token_number(Scanner *scanner) {
                scan_next(scanner);
                break;
            }
-            if (c >= '0' && c <= '9') {
+            if ((c >= '0' && c <= '9') || c == '_') {
                scan_next(scanner);
                continue;
            }
@@ -377,7 +377,7 @@ emit_token_number(Scanner *scanner) {
                scan_next(scanner);
                break;
            }
-            if (c >= '0' && c <= '9') {
+            if ((c >= '0' && c <= '9') || c == '_') {
                scan_next(scanner);
                continue;
            }
@@ -394,7 +394,7 @@ emit_token_number(Scanner *scanner) {
        }
        while (scan_has_next(scanner)) {
            c = scan_peek(scanner);
-            if (c >= '0' && c <= '9') {
+            if ((c >= '0' && c <= '9') || c == '_') {
                scan_next(scanner);
                continue;
            }
@@ -539,29 +539,80 @@ scan_token(Scanner *scanner) {
        return emit_token_number(scanner);
    }
-    // TODO: keywords & literals
-    // Basic literals.
-    // TOK_SYMBOL,
-    // // Keywords.
-    // TOK_LET,
-    // TOK_SET,
-    // TOK_FUN,
-    // TOK_STRUCT,
-    // TOK_IF,
-    // TOK_MATCH,
-    // TOK_CASE,
-    // TOK_WHILE,
-    // TOK_CONTINUE,
-    // TOK_BREAK,
-    // TOK_RETURN,
-    // TOK_NIL,
-    // TOK_TRUE,
-    // TOK_FALSE,
-    // At this point we have an error, skip until we find whitespace again.
    scan_skip_until_valid(scanner);
-    return emit_token_err(&current, cstr("unexpected character"));
+    Str val = current.str;
+    val.size = current.str.size - scanner->str.size;
+    val.size = val.size < 0 ? 0 : val.size;
+    if (val.size == 0) {
+        return emit_token_err(&current, cstr("unexpected character"));
+    }
+    switch (val.mem[0]) {
+        case 'b': {
+            if (str_has_prefix(val, cstr("break"))) {
+                return emit_token(current, scanner, TOK_BREAK);
+            }
+        } break;
+        case 'c': {
+            if (str_has_prefix(val, cstr("case"))) {
+                return emit_token(current, scanner, TOK_CASE);
+            }
+            if (str_has_prefix(val, cstr("continue"))) {
+                return emit_token(current, scanner, TOK_CONTINUE);
+            }
+        } break;
+        case 'f': {
+            if (str_has_prefix(val, cstr("false"))) {
+                return emit_token(current, scanner, TOK_FALSE);
+            }
+            if (str_has_prefix(val, cstr("fun"))) {
+                return emit_token(current, scanner, TOK_FUN);
+            }
+        } break;
+        case 'i': {
+            if (str_has_prefix(val, cstr("if"))) {
+                return emit_token(current, scanner, TOK_IF);
+            }
+        } break;
+        case 'l': {
+            if (str_has_prefix(val, cstr("let"))) {
+                return emit_token(current, scanner, TOK_LET);
+            }
+        } break;
+        case 'm': {
+            if (str_has_prefix(val, cstr("match"))) {
+                return emit_token(current, scanner, TOK_MATCH);
+            }
+        } break;
+        case 'n': {
+            if (str_has_prefix(val, cstr("nil"))) {
+                return emit_token(current, scanner, TOK_NIL);
+            }
+        } break;
+        case 'r': {
+            if (str_has_prefix(val, cstr("return"))) {
+                return emit_token(current, scanner, TOK_RETURN);
+            }
+        } break;
+        case 's': {
+            if (str_has_prefix(val, cstr("set"))) {
+                return emit_token(current, scanner, TOK_SET);
+            }
+            if (str_has_prefix(val, cstr("struct"))) {
+                return emit_token(current, scanner, TOK_STRUCT);
+            }
+        } break;
+        case 't': {
+            if (str_has_prefix(val, cstr("true"))) {
+                return emit_token(current, scanner, TOK_TRUE);
+            }
+        } break;
+        case 'w': {
+            if (str_has_prefix(val, cstr("while"))) {
+                return emit_token(current, scanner, TOK_WHILE);
+            }
+        } break;
+    }
+    return emit_token(current, scanner, TOK_SYMBOL);
 }
 void
diff --git a/tests/literals.bad b/tests/literals.bad
index 673494d..e958bba 100644
--- a/tests/literals.bad
+++ b/tests/literals.bad
@@ -23,7 +23,6 @@
 2.45e5      ; we can use scientific notation
 +1.23e+6    ; +/- can be on the number and/or the exponent
 -3.14e-1    ; the exponents are always integers
-3.21e+0xff ; ... in any of its forms
 ; Booleans.
 true
author	Bad Diode <bd@badd10de.dev>	2024-06-15 16:10:16 +0200
committer	Bad Diode <bd@badd10de.dev>	2024-06-15 16:10:16 +0200
commit	893b52223d274c675272cee55768a9d5853420fb (patch)
tree	e9b57ec842cf622c7b50ec7f1cfb29914e7a7251
parent	99b92b160af5b9475262676aef7dd7f209429298 (diff)
download	bdl-893b52223d274c675272cee55768a9d5853420fb.tar.gz bdl-893b52223d274c675272cee55768a9d5853420fb.zip