#include "lexer.h" #include "errors.h" static const char* token_str[] = { [TOKEN_UNKNOWN] = "TOKEN_UNKNOWN", [TOKEN_LPAREN] = "TOKEN_LPAREN", [TOKEN_RPAREN] = "TOKEN_RPAREN", [TOKEN_FIXNUM] = "TOKEN_FIXNUM", [TOKEN_SYMBOL] = "TOKEN_SYMBOL", [TOKEN_STRING] = "TOKEN_STRING", [TOKEN_NIL] = "TOKEN_NIL", [TOKEN_TRUE] = "TOKEN_TRUE", [TOKEN_FALSE] = "TOKEN_FALSE", [TOKEN_LAMBDA] = "TOKEN_LAMBDA", [TOKEN_IF] = "TOKEN_IF", [TOKEN_DEF] = "TOKEN_DEF", [TOKEN_SET] = "TOKEN_SET", [TOKEN_FUN] = "TOKEN_FUN", [TOKEN_EOF] = "TOKEN_EOF", }; void print_token(Token tok) { printf("[%4ld:%-4ld] ", tok.line, tok.col); printf("%s", token_str[tok.type]); switch (tok.type) { case TOKEN_FIXNUM: { printf(" -> "); sv_write(&tok.value); } break; case TOKEN_SYMBOL: { printf(" -> "); sv_write(&tok.value); } break; case TOKEN_STRING: { printf(" -> "); sv_write(&tok.value); } break; default: { } break; } printf("\n"); } char scan_next(Scanner *scanner) { char c = sv_next(&scanner->current); if (c == '\n') { scanner->line_number++; scanner->col_number = 1; } else { scanner->col_number++; } scanner->offset++; return c; } char scan_peek(const Scanner *scanner) { return sv_peek(&scanner->current); } bool scan_has_next(const Scanner *scanner) { return scanner->current.n != 0; } void skip_whitespace(Scanner *scanner) { while (scan_has_next(scanner)) { char c = scan_peek(scanner); switch (c) { case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': { scan_next(scanner); } break; default: { return; } break; } } } bool is_delimiter(char c) { switch (c) { case EOF: case '\0': case ';': case '"': case '\'': case '(': case ')': case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': { return true; } break; } return false; } #define TOKEN_IS_KEYWORD(VAL, KEYWORD) \ sv_equal(&(VAL), &(StringView){(KEYWORD), sizeof(KEYWORD) - 1}) TokenType find_primitive_type(const StringView value) { bool is_fixnum = true; for (size_t i = 0; i < value.n; i++) { char c = value.start[i]; if (i == 0 && c == '-' && value.n > 1) { continue; } if (!(c >= '0' && c <= '9')) { is_fixnum = false; break; } } if (is_fixnum) { return TOKEN_FIXNUM; } if (TOKEN_IS_KEYWORD(value, "nil")) { return TOKEN_NIL; } if (TOKEN_IS_KEYWORD(value, "true")) { return TOKEN_TRUE; } if (TOKEN_IS_KEYWORD(value, "false")) { return TOKEN_FALSE; } if (TOKEN_IS_KEYWORD(value, "lambda")) { return TOKEN_LAMBDA; } if (TOKEN_IS_KEYWORD(value, "if")) { return TOKEN_IF; } if (TOKEN_IS_KEYWORD(value, "def")) { return TOKEN_DEF; } if (TOKEN_IS_KEYWORD(value, "set!")) { return TOKEN_SET; } if (TOKEN_IS_KEYWORD(value, "fun")) { return TOKEN_FUN; } return TOKEN_SYMBOL; } Token * tokenize(const StringView *sv, Errors *errors) { Token *tokens = NULL; array_init(tokens, 1); Scanner scanner = (Scanner){ .current = *sv, .line_number = 1, .col_number = 1, }; while (scan_has_next(&scanner)) { skip_whitespace(&scanner); size_t line = scanner.line_number; size_t col = scanner.col_number; size_t offset = scanner.offset; char c = scan_next(&scanner); switch (c) { case ';': { while ((c = scan_next(&scanner)) != '\n' && c != '\0') {} } break; case '"': { char prev = c; bool found = false; size_t n = 0; while (scan_has_next(&scanner)) { c = scan_next(&scanner); if (c == '"' && prev != '\\') { found = true; break; } prev = c; n++; } if (!found) { error_push(errors, (Error){ .type = ERR_TYPE_LEXER, .value = ERR_UNMATCHED_STRING, .line = line, .col = col, }); return tokens; } Token token = (Token){ .value = (StringView){ .start = &sv->start[offset + 1], .n = n, }, .type = TOKEN_STRING, .line = line, .col = col, }; array_push(tokens, token); } break; case '(': { if (scan_peek(&scanner) == ')') { scan_next(&scanner); Token token = (Token){ .type = TOKEN_NIL, .line = line, .col = col, }; array_push(tokens, token); } else { Token token = (Token){ .type = TOKEN_LPAREN, .line = line, .col = col, }; array_push(tokens, token); } } break; case ')': { Token token = (Token){ .type = TOKEN_RPAREN, .line = line, .col = col, }; array_push(tokens, token); } break; default: { size_t n = 1; while (!is_delimiter(scan_peek(&scanner))) { scan_next(&scanner); n++; } if (c == EOF || c == '\0') { break; } Token token = (Token){ .value = (StringView){ .start = &sv->start[offset], .n = n, }, .type = TOKEN_SYMBOL, .line = line, .col = col, }; token.type = find_primitive_type(token.value); array_push(tokens, token); } break; } } // Push EOF token. Token token = (Token){ .type = TOKEN_EOF, .line = scanner.line_number, .col = 1, }; array_push(tokens, token); return tokens; }