From eeff5e273f22aa28e81ab080e9ffdce85ac394b8 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Fri, 22 Oct 2021 09:59:31 +0200 Subject: Prepare skeleton for bytecode interpreter --- src/bootstrap/lexer.c | 257 -------------------------------------------------- 1 file changed, 257 deletions(-) delete mode 100644 src/bootstrap/lexer.c (limited to 'src/bootstrap/lexer.c') diff --git a/src/bootstrap/lexer.c b/src/bootstrap/lexer.c deleted file mode 100644 index 38ca37c..0000000 --- a/src/bootstrap/lexer.c +++ /dev/null @@ -1,257 +0,0 @@ -#include "lexer.h" - -void -print_token(Token tok) { - printf("LINE: %3ld COL: %3ld ", tok.line, tok.column); - switch (tok.type) { - case TOKEN_LPAREN: { - printf("TOKEN_LPAREN"); - } break; - case TOKEN_RPAREN: { - printf("TOKEN_RPAREN"); - } break; - case TOKEN_QUOTE: { - printf("TOKEN_QUOTE"); - } break; - case TOKEN_TRUE: { - printf("TOKEN_TRUE"); - } break; - case TOKEN_FALSE: { - printf("TOKEN_FALSE"); - } break; - case TOKEN_NIL: { - printf("TOKEN_NIL"); - } break; - case TOKEN_FIXNUM: { - printf("TOKEN_FIXNUM -> "); - sv_write(&tok.value, stdout); - } break; - case TOKEN_SYMBOL: { - printf("TOKEN_SYMBOL -> "); - sv_write(&tok.value, stdout); - } break; - case TOKEN_STRING: { - printf("TOKEN_STRING -> "); - sv_write(&tok.value, stdout); - } break; - case TOKEN_EOF: { - printf("TOKEN_EOF"); - } break; - case TOKEN_UNKNOWN: { - printf("TOKEN_UNKNOWN"); - } break; - } - printf("\n"); -} - -char -scan_next(Scanner *scanner) { - char c = sv_next(&scanner->current); - if (c == '\n') { - scanner->line_number++; - scanner->col_number = 1; - } else { - scanner->col_number++; - } - scanner->offset++; - return c; -} - -char -scan_peek(const Scanner *scanner) { - return sv_peek(&scanner->current); -} - -bool -scan_has_next(const Scanner *scanner) { - return scanner->current.n != 0; -} - -void -skip_whitespace(Scanner *scanner) { - while (scan_has_next(scanner)) { - char c = scan_peek(scanner); - switch (c) { - case ' ': - case '\f': - case '\n': - case '\r': - case '\t': - case '\v': { - scan_next(scanner); - } break; - default: { - return; - } break; - } - } -} - -bool -is_delimiter(char c) { - switch (c) { - case EOF: - case '\0': - case ';': - case '"': - case '\'': - case '(': - case ')': - case ' ': - case '\f': - case '\n': - case '\r': - case '\t': - case '\v': { - return true; - } break; - } - return false; -} - -TokenType -find_primitive_type(const StringView value) { - bool is_fixnum = true; - for (size_t i = 0; i < value.n; i++) { - char c = value.start[i]; - if (i == 0 && c == '-' && value.n > 1) { - continue; - } - if (!(c >= '0' && c <= '9')) { - is_fixnum = false; - break; - } - } - if (is_fixnum) { - return TOKEN_FIXNUM; - } - if (sv_equal(&value, &(StringView){"true", 4})) { - return TOKEN_TRUE; - } - if (sv_equal(&value, &(StringView){"false", 5})) { - return TOKEN_FALSE; - } - return TOKEN_SYMBOL; -} - -Token * -tokenize(const StringView *sv) { - Token *tokens = NULL; - array_init(tokens, 1); - Scanner scanner = (Scanner){ - .current = *sv, - .line_number = 1, - .col_number = 1, - }; - - while (scan_has_next(&scanner)) { - skip_whitespace(&scanner); - size_t line = scanner.line_number; - size_t col = scanner.col_number; - size_t offset = scanner.offset; - char c = scan_next(&scanner); - switch (c) { - case ';': { - while ((c = scan_next(&scanner)) != '\n' && c != '\0') {} - } break; - case '"': { - char prev = c; - bool found = false; - size_t n = 0; - while (scan_has_next(&scanner)) { - c = scan_next(&scanner); - if (c == '"' && prev != '\\') { - found = true; - break; - } - prev = c; - n++; - } - if (!found) { - error_push((Error){ - .type = ERR_TYPE_LEXER, - .value = ERR_UNMATCHED_STRING, - .line = line, - .col = col, - }); - return tokens; - } - Token token = (Token){ - .value = (StringView){ - .start = &sv->start[offset + 1], - .n = n, - }, - .type = TOKEN_STRING, - .line = line, - .column = col, - }; - array_push(tokens, token); - } break; - case '\'': { - Token token = (Token){ - .type = TOKEN_QUOTE, - .line = line, - .column = col, - }; - array_push(tokens, token); - } break; - case '(': { - if (scan_peek(&scanner) == ')') { - scan_next(&scanner); - Token token = (Token){ - .type = TOKEN_NIL, - .line = line, - .column = col, - }; - array_push(tokens, token); - } else { - Token token = (Token){ - .type = TOKEN_LPAREN, - .line = line, - .column = col, - }; - array_push(tokens, token); - } - } break; - case ')': { - Token token = (Token){ - .type = TOKEN_RPAREN, - .line = line, - .column = col, - }; - array_push(tokens, token); - } break; - default: { - size_t n = 1; - while (!is_delimiter(scan_peek(&scanner))) { - scan_next(&scanner); - n++; - } - if (c == EOF || c == '\0') { - break; - } - Token token = (Token){ - .value = (StringView){ - .start = &sv->start[offset], - .n = n, - }, - .type = TOKEN_SYMBOL, - .line = line, - .column = col, - }; - token.type = find_primitive_type(token.value); - array_push(tokens, token); - } break; - } - } - - // Push EOF token. - Token token = (Token){ - .type = TOKEN_EOF, - .line = scanner.line_number, - .column = 1, - }; - array_push(tokens, token); - - return tokens; -} -- cgit v1.2.1