From c4765a539ee01625dd310a02f0be16ec9a64e2e4 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Sat, 12 Feb 2022 16:21:19 +0100 Subject: Make keywords a static array for ease of lex --- src/lexer.c | 62 +++++++++++++++++++++++++++++++++++-------------------------- src/main.c | 3 --- 2 files changed, 36 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/lexer.c b/src/lexer.c index 80ef3ad..5175b1c 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -27,6 +27,26 @@ static const char* token_str[] = { [TOKEN_EOF] = "TOKEN_EOF", }; +typedef struct Keyword { + char *str; + size_t n; + TokenType token; +} Keyword; + +#define KEYWORD(STR,TOK) {(STR), sizeof(STR) - 1, (TOK)} + +static const Keyword keywords[] = { + KEYWORD("nil", TOKEN_NIL), + KEYWORD("true", TOKEN_TRUE), + KEYWORD("false", TOKEN_FALSE), + KEYWORD("lambda", TOKEN_LAMBDA), + KEYWORD("if", TOKEN_IF), + KEYWORD("def", TOKEN_DEF), + KEYWORD("set!", TOKEN_SET), + KEYWORD("fun", TOKEN_FUN), + KEYWORD("struct", TOKEN_STRUCT), +}; + void print_token(Token tok) { printf("[%4ld:%-4ld] ", tok.line, tok.col); @@ -121,11 +141,12 @@ is_delimiter(char c) { return false; } -#define TOKEN_IS_KEYWORD(VAL, KEYWORD) \ - sv_equal(&(VAL), &(StringView){(KEYWORD), sizeof(KEYWORD) - 1}) - size_t scan_number_token(Scanner *scanner) { + // TODO: This looks like more a parsing problem than lexer, + // consider moving it there. If starts with `-` and there is no + // delimiter after, or if it starts with a number, it is + // TOKEN_NUMBER. char first = scan_next(scanner); char second = scan_peek(scanner); size_t n = 1; @@ -183,16 +204,12 @@ scan_number_token(Scanner *scanner) { TokenType find_token_type(const StringView value) { - if (TOKEN_IS_KEYWORD(value, "nil")) { return TOKEN_NIL; } - if (TOKEN_IS_KEYWORD(value, "true")) { return TOKEN_TRUE; } - if (TOKEN_IS_KEYWORD(value, "false")) { return TOKEN_FALSE; } - if (TOKEN_IS_KEYWORD(value, "lambda")) { return TOKEN_LAMBDA; } - if (TOKEN_IS_KEYWORD(value, "if")) { return TOKEN_IF; } - if (TOKEN_IS_KEYWORD(value, "def")) { return TOKEN_DEF; } - if (TOKEN_IS_KEYWORD(value, "set!")) { return TOKEN_SET; } - if (TOKEN_IS_KEYWORD(value, "fun")) { return TOKEN_FUN; } - if (TOKEN_IS_KEYWORD(value, "struct")) { return TOKEN_STRUCT; } - + for (size_t i = 0; i < sizeof(keywords) / sizeof(Keyword); i++) { + StringView keyword = (StringView){keywords[i].str, keywords[i].n}; + if (sv_equal(&value, &keyword)) { + return keywords[i].token; + } + } return TOKEN_SYMBOL; } @@ -267,19 +284,12 @@ tokenize(const StringView *sv) { break; } size_t n = 1; - if (c == '-' && !is_delimiter(scan_peek(&scanner))) { - n += scan_number_token(&scanner); - token.value = (StringView){ - .start = &sv->start[offset], - .n = n, - }; - token.type = TOKEN_NUMBER; - } else if (c >= '0' && c <= '9') { - scan_rewind(&scanner); - n = scan_number_token(&scanner); - if (n == 0) { - push_error(ERR_TYPE_LEXER, ERR_MALFORMED_NUMBER, line, col); - return tokens; + bool is_number = c == '-' && !is_delimiter(scan_peek(&scanner)); + is_number = is_number || (c >= '0' && c <= '9'); + if (is_number) { + while (!is_delimiter(scan_peek(&scanner))) { + c = scan_next(&scanner); + n++; } token.value = (StringView){ .start = &sv->start[offset], diff --git a/src/main.c b/src/main.c index 17dd481..31fb5da 100644 --- a/src/main.c +++ b/src/main.c @@ -27,9 +27,6 @@ process_source(const StringView *source, const char *file_name) { Token *tokens = tokenize(source); print_tokens(tokens); check_errors(file_name); - // if (errors.n != 0) { - // exit(EXIT_FAILURE); - // } // // Parser. // Program program = parse(tokens, &errors); -- cgit v1.2.1