#include "lexer.h" #include "errors.h" static const char* token_str[] = { [TOKEN_UNKNOWN] = "UNKNOWN", [TOKEN_LPAREN] = "LPAREN", [TOKEN_RPAREN] = "RPAREN", [TOKEN_LSQUARE] = "LSQUARE", [TOKEN_RSQUARE] = "RSQUARE", [TOKEN_LCURLY] = "LCURLY", [TOKEN_RCURLY] = "RCURLY", [TOKEN_NUMBER] = "NUMBER", [TOKEN_SYMBOL] = "SYMBOL", [TOKEN_STRING] = "STRING", [TOKEN_NIL] = "NIL", [TOKEN_TRUE] = "TRUE", [TOKEN_FALSE] = "FALSE", [TOKEN_LAMBDA] = "LAMBDA", [TOKEN_IF] = "IF", [TOKEN_DEF] = "DEF", [TOKEN_SET] = "SET", [TOKEN_FUN] = "FUN", [TOKEN_STRUCT] = "STRUCT", [TOKEN_ADD] = "ADD", [TOKEN_SUB] = "SUB", [TOKEN_MUL] = "MUL", [TOKEN_DIV] = "DIV", [TOKEN_MOD] = "MOD", [TOKEN_NOT] = "NOT", [TOKEN_AND] = "AND", [TOKEN_OR] = "OR", [TOKEN_EQ] = "EQ", [TOKEN_LT] = "LT", [TOKEN_GT] = "GT", [TOKEN_LE] = "LE", [TOKEN_GE] = "GE", [TOKEN_COLON] = "COLON", [TOKEN_DOT] = "DOT", [TOKEN_AT] = "AT", [TOKEN_EOF] = "EOF", }; typedef struct Keyword { char *str; size_t n; TokenType token; } Keyword; #define KEYWORD(STR,TOK) {(STR), sizeof(STR) - 1, (TOK)} static const Keyword keywords[] = { KEYWORD("nil", TOKEN_NIL), KEYWORD("true", TOKEN_TRUE), KEYWORD("false", TOKEN_FALSE), KEYWORD("lambda", TOKEN_LAMBDA), KEYWORD("if", TOKEN_IF), KEYWORD("def", TOKEN_DEF), KEYWORD("set", TOKEN_SET), KEYWORD("fun", TOKEN_FUN), KEYWORD("struct", TOKEN_STRUCT), KEYWORD("+", TOKEN_ADD), KEYWORD("-", TOKEN_SUB), KEYWORD("*", TOKEN_MUL), KEYWORD("/", TOKEN_DIV), KEYWORD("%", TOKEN_MOD), KEYWORD("not", TOKEN_NOT), KEYWORD("and", TOKEN_AND), KEYWORD("or", TOKEN_OR), KEYWORD("=", TOKEN_EQ), KEYWORD("<", TOKEN_LT), KEYWORD(">", TOKEN_GT), KEYWORD("<=", TOKEN_LE), KEYWORD(">=", TOKEN_GE), }; void print_token(Token tok) { printf("[%4ld:%-4ld] ", tok.line, tok.col); printf("%s", token_str[tok.type]); switch (tok.type) { case TOKEN_NUMBER: case TOKEN_SYMBOL: case TOKEN_STRING: { printf(" -> "); sv_write(&tok.value); } break; default: { } break; } printf("\n"); } char scan_next(Scanner *scanner) { char c = sv_next(&scanner->current); if (c == '\n') { scanner->line_number++; scanner->col_number = 1; } else { scanner->col_number++; } scanner->offset++; return c; } void scan_rewind(Scanner *scanner) { sv_rewind(&scanner->current); scanner->offset--; } char scan_peek(const Scanner *scanner) { return sv_peek(&scanner->current); } bool scan_has_next(const Scanner *scanner) { return scanner->current.n != 0; } void skip_whitespace(Scanner *scanner) { while (scan_has_next(scanner)) { char c = scan_peek(scanner); switch (c) { case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': { scan_next(scanner); } break; default: { return; } break; } } } bool is_delimiter(char c) { switch (c) { case EOF: case '\0': case ';': case '"': case '\'': case '(': case ')': case '[': case ']': case '{': case '}': case ':': case '@': case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': { return true; } break; } return false; } TokenType find_token_type(const StringView value) { for (size_t i = 0; i < sizeof(keywords) / sizeof(Keyword); i++) { StringView keyword = (StringView){keywords[i].str, keywords[i].n}; if (sv_equal(&value, &keyword)) { return keywords[i].token; } } return TOKEN_SYMBOL; } void print_tokens(Token *tokens) { for (size_t i = 0; i < array_size(tokens); i++) { print_token(tokens[i]); } } Token * tokenize(const StringView *sv) { Token *tokens = NULL; array_init(tokens, 1); Scanner scanner = (Scanner){ .current = *sv, .line_number = 1, .col_number = 1, }; while (scan_has_next(&scanner)) { skip_whitespace(&scanner); size_t line = scanner.line_number; size_t col = scanner.col_number; size_t offset = scanner.offset; Token token = (Token){ .type = TOKEN_UNKNOWN, .line = line, .col = col, }; char c = scan_next(&scanner); switch (c) { case ';': { while ((c = scan_next(&scanner)) != '\n' && c != '\0') {} continue; } break; case '"': { char prev = c; bool found = false; size_t n = 0; while (scan_has_next(&scanner)) { c = scan_next(&scanner); if (c == '"' && prev != '\\') { found = true; break; } prev = c; n++; } if (!found) { push_error(ERR_TYPE_LEXER, ERR_UNMATCHED_STRING, line, col); return tokens; } token.value = (StringView){ .start = &sv->start[offset + 1], .n = n, }; token.type = TOKEN_STRING; } break; case '(': { token.type = TOKEN_LPAREN; } break; case ')': { token.type = TOKEN_RPAREN; } break; case '[': { token.type = TOKEN_LSQUARE; } break; case ']': { token.type = TOKEN_RSQUARE; } break; case '{': { token.type = TOKEN_LCURLY; } break; case '}': { token.type = TOKEN_RCURLY; } break; case ':': { token.type = TOKEN_COLON; } break; case '.': { token.type = TOKEN_DOT; } break; case '@': { token.type = TOKEN_AT; } break; default: { if (c == EOF || c == '\0') { token.type = TOKEN_EOF; break; } size_t n = 1; bool num = c == '-' && !is_delimiter(scan_peek(&scanner)); num = num || (c == '+' && !is_delimiter(scan_peek(&scanner))); num = num || (c >= '0' && c <= '9'); if (num) { while (!is_delimiter(scan_peek(&scanner))) { c = scan_next(&scanner); n++; } token.value = (StringView){ .start = &sv->start[offset], .n = n, }; token.type = TOKEN_NUMBER; } else { while (!is_delimiter(scan_peek(&scanner))) { if (scan_peek(&scanner) == '.') { break; } c = scan_next(&scanner); n++; } token.value = (StringView){ .start = &sv->start[offset], .n = n, }; token.type = find_token_type(token.value); } } break; } if (token.type == TOKEN_UNKNOWN) { push_error(ERR_TYPE_LEXER, ERR_UNKNOWN_TOK_TYPE, line, col); return tokens; } array_push(tokens, token); } return tokens; }