#include #include #include #include "badlib.h" typedef enum ExecMode { RUN_NORMAL, PRINT_LEX, PRINT_PARSE, PRINT_SEMANTIC, PRINT_SYMTABLES, } ExecMode; static ExecMode mode = RUN_NORMAL; #define LEXER_MEM GB(2) void init(void) { log_init_default(); } typedef enum TokenType { TOK_UNKNOWN = 0, // Parentheses. TOK_LPAREN, // ( TOK_RPAREN, // ) TOK_LSQUARE, // [ TOK_RSQUARE, // ] TOK_LCURLY, // { TOK_RCURLY, // } // Basic literals. TOK_NUMBER, TOK_SYMBOL, TOK_STRING, // Keywords. TOK_BREAK, // break TOK_CASE, // case TOK_CONTINUE, // continue TOK_FALSE, // false TOK_FUN, // fun TOK_IF, // if TOK_LET, // let TOK_MATCH, // match TOK_NIL, // nil TOK_RETURN, // return TOK_SET, // set TOK_STRUCT, // struct TOK_TRUE, // true TOK_WHILE, // while // Arithmetic ops. TOK_ADD, // + TOK_SUB, // - TOK_MUL, // * TOK_DIV, // / TOK_MOD, // % // Logical ops. TOK_NOT, // ! TOK_AND, // && TOK_OR, // || TOK_EQ, // == TOK_NOTEQ, // != TOK_LT, // < TOK_GT, // > TOK_LE, // <= TOK_GE, // >= // Bitwise ops. TOK_BITNOT, // ~ TOK_BITAND, // & TOK_BITOR, // | TOK_BITLSHIFT, // << TOK_BITRSHIFT, // >> // Special ops. TOK_COLON, // : TOK_DOT, // . TOK_AT, // @ TOK_ASSIGN, // = // End of file. TOK_EOF, } TokenType; Str token_str[] = { [TOK_UNKNOWN] = cstr("UNKNOWN"), // Parentheses. [TOK_LPAREN] = cstr("LPAREN"), [TOK_RPAREN] = cstr("RPAREN"), [TOK_LSQUARE] = cstr("LSQUARE"), [TOK_RSQUARE] = cstr("RSQUARE"), [TOK_LCURLY] = cstr("LCURLY"), [TOK_RCURLY] = cstr("RCURLY"), // Basic literals. [TOK_NUMBER] = cstr("NUMBER"), [TOK_SYMBOL] = cstr("SYMBOL"), [TOK_STRING] = cstr("STRING"), // Keywords. [TOK_BREAK] = cstr("BREAK"), [TOK_CASE] = cstr("CASE"), [TOK_CONTINUE] = cstr("CONTINUE"), [TOK_FALSE] = cstr("FALSE"), [TOK_FUN] = cstr("FUN"), [TOK_IF] = cstr("IF"), [TOK_LET] = cstr("LET"), [TOK_MATCH] = cstr("MATCH"), [TOK_NIL] = cstr("NIL"), [TOK_RETURN] = cstr("RETURN"), [TOK_SET] = cstr("SET"), [TOK_STRUCT] = cstr("STRUCT"), [TOK_TRUE] = cstr("TRUE"), [TOK_WHILE] = cstr("WHILE"), // Arithmetic ops. [TOK_ADD] = cstr("ADD"), [TOK_SUB] = cstr("SUB"), [TOK_MUL] = cstr("MUL"), [TOK_DIV] = cstr("DIV"), [TOK_MOD] = cstr("MOD"), // Logical ops. [TOK_NOT] = cstr("NOT"), [TOK_AND] = cstr("AND"), [TOK_OR] = cstr("OR"), [TOK_EQ] = cstr("EQ"), [TOK_NOTEQ] = cstr("NOTEQ"), [TOK_LT] = cstr("LT"), [TOK_GT] = cstr("GT"), [TOK_LE] = cstr("LE"), [TOK_GE] = cstr("GE"), // Bitwise ops. [TOK_BITNOT] = cstr("BITNOT"), [TOK_BITAND] = cstr("BITAND"), [TOK_BITOR] = cstr("BITOR"), [TOK_BITLSHIFT] = cstr("BITLSHIFT"), [TOK_BITRSHIFT] = cstr("BITRSHIFT"), // Special ops. [TOK_COLON] = cstr("COLON"), [TOK_DOT] = cstr("DOT"), [TOK_AT] = cstr("AT"), [TOK_ASSIGN] = cstr("ASSIGN"), // End of file. [TOK_EOF] = cstr("EOF"), }; typedef struct Token { TokenType type; Str val; sz line; sz col; } Token; typedef struct Scanner { Str str; sz line; sz col; Arena *storage; } Scanner; char scan_next(Scanner *scanner) { char c = str_next(&scanner->str); if (c == '\n') { scanner->line++; scanner->col = 0; } else { scanner->col++; } return c; } bool scan_has_next(Scanner *scanner) { return scanner->str.size; } char scan_peek(Scanner *scanner) { return str_peek(scanner->str); } Token emit_token(Scanner current, Scanner *scanner, TokenType t) { Str val = current.str; val.size = current.str.size - scanner->str.size; val.size = val.size < 0 ? 0 : val.size; return (Token){ .val = val, .line = current.line + 1, .col = current.col + 1, .type = t, }; } Token emit_token_err(Scanner *scanner, Str err_msg) { return (Token){ .line = scanner->line + 1, .col = scanner->col + 1, .val = err_msg, .type = TOK_UNKNOWN, }; } void scan_skip_line(Scanner *scanner) { SearchResult newline = array_find_next(scanner->str, cstr("\n")); if (newline.found) { scanner->str.mem += newline.pos + 1; scanner->str.size -= newline.pos + 1; scanner->line++; scanner->col = 0; } } void scan_skip_whitespace(Scanner *scanner) { while (scan_has_next(scanner)) { char c = scan_peek(scanner); switch (c) { case ' ': case ',': // Commas are just syntactic sugar. case '\f': case '\n': case '\r': case '\t': case '\v': { scan_next(scanner); } break; case ';': { // Found a comment! (skip) scan_skip_line(scanner); } break; default: { return; } break; } } } bool is_valid_split(char c) { switch (c) { case ';': case '(': case ')': case '[': case ']': case '{': case '}': case '+': case '-': case '*': case '/': case '%': case '!': case '=': case '<': case '>': case '~': case '&': case '|': case ':': case '.': case '@': case '"': case ' ': case ',': case '\f': case '\n': case '\r': case '\t': case '\v': { return true; } break; } return false; } void scan_skip_until_valid(Scanner *scanner) { while (scan_has_next(scanner)) { char c = scan_peek(scanner); if (is_valid_split(c)) { return; } scan_next(scanner); } } Token emit_token_number(Scanner *scanner) { Scanner current = *scanner; char c = scan_peek(scanner); if (c == '+' || c == '-') { scan_next(scanner); if (str_has_prefix(scanner->str, cstr("0b")) || str_has_prefix(scanner->str, cstr("0x"))) { scan_skip_until_valid(scanner); return emit_token_err( ¤t, cstr("malformed number: binary/hex numbers can't be signed")); } } if (str_has_prefix(scanner->str, cstr("0b"))) { scan_next(scanner); scan_next(scanner); while (scan_has_next(scanner)) { c = scan_peek(scanner); if (c == '0' || c == '1' || c == '_') { scan_next(scanner); continue; } if (is_valid_split(c)) { return emit_token(current, scanner, TOK_NUMBER); } scan_skip_until_valid(scanner); return emit_token_err( ¤t, cstr("malformed number: invalid binary number")); } } else if (str_has_prefix(scanner->str, cstr("0x"))) { scan_next(scanner); scan_next(scanner); while (scan_has_next(scanner)) { c = scan_peek(scanner); if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || c == '_') { scan_next(scanner); continue; } if (is_valid_split(c)) { return emit_token(current, scanner, TOK_NUMBER); } scan_skip_until_valid(scanner); return emit_token_err(¤t, cstr("malformed number: invalid hex number")); } } else { // Integral. while (scan_has_next(scanner)) { c = scan_peek(scanner); if (c == '.') { scan_next(scanner); break; } if ((c >= '0' && c <= '9') || c == '_') { scan_next(scanner); continue; } if (is_valid_split(c)) { return emit_token(current, scanner, TOK_NUMBER); } scan_skip_until_valid(scanner); return emit_token_err(¤t, cstr("malformed number")); } c = scan_peek(scanner); if (!(c >= '0' && c <= '9')) { return emit_token_err(¤t, cstr("malformed number: no decimal digits")); } // Decimals. while (scan_has_next(scanner)) { c = scan_peek(scanner); if (c == 'e' || c == 'E') { scan_next(scanner); break; } if ((c >= '0' && c <= '9') || c == '_') { scan_next(scanner); continue; } if (is_valid_split(c)) { return emit_token(current, scanner, TOK_NUMBER); } scan_skip_until_valid(scanner); return emit_token_err(¤t, cstr("malformed number")); } // Exponent. c = scan_peek(scanner); if (c == '+' || c == '-') { scan_next(scanner); } while (scan_has_next(scanner)) { c = scan_peek(scanner); if ((c >= '0' && c <= '9') || c == '_') { scan_next(scanner); continue; } if (c == '.') { scan_next(scanner); return emit_token_err( ¤t, cstr("malformed number: decimals not allowed on exponent")); } if (is_valid_split(c)) { return emit_token(current, scanner, TOK_NUMBER); } scan_skip_until_valid(scanner); return emit_token_err(¤t, cstr("malformed number")); } } return emit_token_err(¤t, cstr("malformed number")); } Token scan_token(Scanner *scanner) { assert(scanner); scan_skip_whitespace(scanner); if (!scan_has_next(scanner)) { return emit_token(*scanner, scanner, TOK_EOF); } Scanner current = *scanner; char c = scan_next(scanner); switch (c) { case '(': return emit_token(current, scanner, TOK_LPAREN); case ')': return emit_token(current, scanner, TOK_RPAREN); case '[': return emit_token(current, scanner, TOK_LSQUARE); case ']': return emit_token(current, scanner, TOK_RSQUARE); case '{': return emit_token(current, scanner, TOK_LCURLY); case '}': return emit_token(current, scanner, TOK_RCURLY); case '+': { char p = scan_peek(scanner); if (p >= '0' && p <= '9') { *scanner = current; return emit_token_number(scanner); } return emit_token(current, scanner, TOK_ADD); }; case '-': { char p = scan_peek(scanner); if (p >= '0' && p <= '9') { *scanner = current; return emit_token_number(scanner); } return emit_token(current, scanner, TOK_ADD); }; case '*': return emit_token(current, scanner, TOK_MUL); case '/': return emit_token(current, scanner, TOK_DIV); case '%': return emit_token(current, scanner, TOK_MOD); case '!': { if (scan_peek(scanner) == '=') { scan_next(scanner); return emit_token(current, scanner, TOK_NOTEQ); } return emit_token(current, scanner, TOK_NOT); }; case '=': { if (scan_peek(scanner) == '=') { scan_next(scanner); return emit_token(current, scanner, TOK_EQ); } return emit_token(current, scanner, TOK_ASSIGN); }; case '<': { char p = scan_peek(scanner); if (p == '=') { scan_next(scanner); return emit_token(current, scanner, TOK_LE); } if (p == '<') { scan_next(scanner); return emit_token(current, scanner, TOK_BITLSHIFT); } return emit_token(current, scanner, TOK_LT); }; case '>': { char p = scan_peek(scanner); if (p == '=') { scan_next(scanner); return emit_token(current, scanner, TOK_GE); } if (p == '>') { scan_next(scanner); return emit_token(current, scanner, TOK_BITRSHIFT); } return emit_token(current, scanner, TOK_GT); }; case '~': return emit_token(current, scanner, TOK_BITNOT); case '&': { if (scan_peek(scanner) == '&') { scan_next(scanner); return emit_token(current, scanner, TOK_AND); } return emit_token(current, scanner, TOK_BITAND); }; case '|': { if (scan_peek(scanner) == '|') { scan_next(scanner); return emit_token(current, scanner, TOK_OR); } return emit_token(current, scanner, TOK_BITOR); }; case ':': return emit_token(current, scanner, TOK_COLON); case '.': return emit_token(current, scanner, TOK_DOT); case '@': return emit_token(current, scanner, TOK_AT); case '"': { while (scan_has_next(scanner)) { c = scan_next(scanner); if (c == '\\') { scan_next(scanner); continue; } if (c == '"') { return emit_token(current, scanner, TOK_STRING); } } return emit_token_err(¤t, cstr("mismatched string quotes")); }; } if (c >= '0' && c <= '9') { *scanner = current; return emit_token_number(scanner); } scan_skip_until_valid(scanner); Str val = current.str; val.size = current.str.size - scanner->str.size; val.size = val.size < 0 ? 0 : val.size; if (val.size == 0) { return emit_token_err(¤t, cstr("unexpected character")); } switch (val.mem[0]) { case 'b': { if (str_has_prefix(val, cstr("break"))) { return emit_token(current, scanner, TOK_BREAK); } } break; case 'c': { if (str_has_prefix(val, cstr("case"))) { return emit_token(current, scanner, TOK_CASE); } if (str_has_prefix(val, cstr("continue"))) { return emit_token(current, scanner, TOK_CONTINUE); } } break; case 'f': { if (str_has_prefix(val, cstr("false"))) { return emit_token(current, scanner, TOK_FALSE); } if (str_has_prefix(val, cstr("fun"))) { return emit_token(current, scanner, TOK_FUN); } } break; case 'i': { if (str_has_prefix(val, cstr("if"))) { return emit_token(current, scanner, TOK_IF); } } break; case 'l': { if (str_has_prefix(val, cstr("let"))) { return emit_token(current, scanner, TOK_LET); } } break; case 'm': { if (str_has_prefix(val, cstr("match"))) { return emit_token(current, scanner, TOK_MATCH); } } break; case 'n': { if (str_has_prefix(val, cstr("nil"))) { return emit_token(current, scanner, TOK_NIL); } } break; case 'r': { if (str_has_prefix(val, cstr("return"))) { return emit_token(current, scanner, TOK_RETURN); } } break; case 's': { if (str_has_prefix(val, cstr("set"))) { return emit_token(current, scanner, TOK_SET); } if (str_has_prefix(val, cstr("struct"))) { return emit_token(current, scanner, TOK_STRUCT); } } break; case 't': { if (str_has_prefix(val, cstr("true"))) { return emit_token(current, scanner, TOK_TRUE); } } break; case 'w': { if (str_has_prefix(val, cstr("while"))) { return emit_token(current, scanner, TOK_WHILE); } } break; } return emit_token(current, scanner, TOK_SYMBOL); } void process_file(Str path) { Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); FileContents file = platform_read_file(path, &lexer_arena); if (file.err) { printf("file.err: %d\n", file.err); eprintln("%s: error: %s", path, cstr("WOT")); return; } Scanner scanner = { .str = file.data, .storage = &lexer_arena, }; Token tok = {0}; while (tok.type != TOK_EOF) { tok = scan_token(&scanner); eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col, token_str[tok.type], tok.val); } // while (true) { // Token tok = scan_token(&scanner); // println("%s:%d:%d:%s %s", path, tok.line, tok.col, // token_str[tok.type], // tok.val); // if (tok.type == TOK_EOF) break; // } // Str scanner = file.data; // // NOTE: Testing file read line by line. // for (sz i = 0; scanner.size != 0; i++) { // Str line = str_split(&scanner, cstr("\n")); // println("%x{4} %s", i + 1, line); // } // println("<<< %x{4} %b{4} %f{2} %s %{Arena} >>>", 123, 3, 1.345, // cstr("BOOM!"), &logger_inf.storage); // println("%{Mem}", &(Array){lexer_arena.beg, lexer_arena.size}); // eprintln("%s:%d:%d: %s -> %c", path, 1, 1, cstr("error: testing string // logger"), 'X'); while (true) {} // TODO: run lexer. // Free up resources. arena_destroy(&lexer_arena, os_allocator); } #ifndef BIN_NAME #define BIN_NAME "bdl" #endif void print_usage(void) { printf("Usage: %s [options] \n", BIN_NAME); printf("\n"); printf("\t-h \t\tShow usage.\n"); printf( "\t-p [l|p|s|t]\tPrint mode for [l]exing, [p]arsing, [s]emantic " "analysis, symbol [t]ables\n"); printf("\n"); } int main(int argc, char *argv[]) { int option; while ((option = getopt(argc, argv, "hp:")) != -1) { switch (option) { case 'h': { print_usage(); goto exit_success; } break; case 'p': { if (optarg[0] == 'l' && optarg[1] == '\0') { mode = PRINT_LEX; } else if (optarg[0] == 'p' && optarg[1] == '\0') { mode = PRINT_PARSE; } else if (optarg[0] == 's' && optarg[1] == '\0') { mode = PRINT_SEMANTIC; } else if (optarg[0] == 't' && optarg[1] == '\0') { mode = PRINT_SYMTABLES; } else { print_usage(); return EXIT_FAILURE; } } break; default: { print_usage(); return EXIT_FAILURE; } break; } } init(); // Run from stdin. if (optind == argc) { // TODO: REPL // repl(); goto exit_success; } // Run from file. while (optind < argc) { char *file_name = argv[optind]; Str file_path = STR(file_name); process_file(file_path); optind++; } exit_success: return EXIT_SUCCESS; }