typedef enum TokenType { TOKEN_UNKNOWN = 0, TOKEN_LPAREN, TOKEN_RPAREN, TOKEN_QUOTE, TOKEN_TRUE, TOKEN_FALSE, TOKEN_NIL, TOKEN_FIXNUM, TOKEN_SYMBOL, TOKEN_STRING, TOKEN_EOF, } TokenType; typedef struct Token { TokenType type; StringView value; size_t line; size_t column; } Token; typedef struct Tokens { Token *buf; size_t size; size_t cap; } Tokens; void print_token(Token tok) { printf("LINE: %3ld COL: %3ld ", tok.line, tok.column); switch (tok.type) { case TOKEN_LPAREN: { printf("TOKEN_LPAREN"); } break; case TOKEN_RPAREN: { printf("TOKEN_RPAREN"); } break; case TOKEN_QUOTE: { printf("TOKEN_QUOTE"); } break; case TOKEN_TRUE: { printf("TOKEN_TRUE"); } break; case TOKEN_FALSE: { printf("TOKEN_FALSE"); } break; case TOKEN_NIL: { printf("TOKEN_NIL"); } break; case TOKEN_FIXNUM: { printf("TOKEN_FIXNUM -> "); sv_write(&tok.value, stdout); } break; case TOKEN_SYMBOL: { printf("TOKEN_SYMBOL -> "); sv_write(&tok.value, stdout); } break; case TOKEN_STRING: { printf("TOKEN_STRING -> "); sv_write(&tok.value, stdout); } break; case TOKEN_EOF: { printf("TOKEN_EOF"); } break; case TOKEN_UNKNOWN: { printf("TOKEN_UNKNOWN"); } break; } printf("\n"); } #define TOK_BUF_CAP 256 void push_token(Tokens *tokens, Token tok) { if (tokens->buf == NULL) { tokens->size = 0; tokens->cap = TOK_BUF_CAP; tokens->buf = malloc(tokens->cap * sizeof(Token)); } else if (tokens->size == tokens->cap) { tokens->cap *= 2; tokens->buf = realloc(tokens->buf, tokens->cap * sizeof(Token)); } tokens->buf[tokens->size++] = tok; } typedef struct Scanner { StringView current; size_t line_number; size_t col_number; size_t offset; } Scanner; char scan_next(Scanner *scanner) { char c = sv_next(&scanner->current); if (c == '\n') { scanner->line_number++; scanner->col_number = 1; } else { scanner->col_number++; } scanner->offset++; return c; } char scan_peek(const Scanner *scanner) { return sv_peek(&scanner->current); } bool scan_has_next(const Scanner *scanner) { return scanner->current.n != 0; } void skip_whitespace(Scanner *scanner) { while (scan_has_next(scanner)) { char c = scan_peek(scanner); switch (c) { case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': { scan_next(scanner); } break; default: { return; } break; } } } bool is_delimiter(char c) { switch (c) { case EOF: case '\0': case ';': case '"': case '\'': case '(': case ')': case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': { return true; } break; } return false; } TokenType find_primitive_type(StringView value) { bool is_fixnum = true; for (size_t i = 0; i < value.n; i++) { char c = value.start[i]; if (i == 0 && c == '-' && value.n > 1) { continue; } if (!(c >= '0' && c <= '9')) { is_fixnum = false; break; } } if (is_fixnum) { return TOKEN_FIXNUM; } if (sv_equal(&value, &(StringView){"true", 4})) { return TOKEN_TRUE; } if (sv_equal(&value, &(StringView){"false", 5})) { return TOKEN_FALSE; } return TOKEN_SYMBOL; } Tokens tokenize(const StringView *sv) { Tokens tokens = (Tokens){0}; Scanner scanner = (Scanner){ .current = *sv, .line_number = 1, .col_number = 1, }; while (scan_has_next(&scanner)) { skip_whitespace(&scanner); size_t line = scanner.line_number; size_t col = scanner.col_number; size_t offset = scanner.offset; char c = scan_next(&scanner); switch (c) { case ';': { while ((c = scan_next(&scanner)) != '\n' && c != '\0') {} } break; case '"': { char prev = c; bool found = false; size_t n = 0; while (scan_has_next(&scanner)) { c = scan_next(&scanner); if (c == '"' && prev != '\\') { found = true; break; } prev = c; n++; } if (!found) { error_push((Error){ .type = ERR_TYPE_LEXER, .value = ERR_UNMATCHED_STRING, .line = line, .col = col, }); return tokens; } Token token = (Token){ .value = (StringView){ .start = &sv->start[offset + 1], .n = n, }, .type = TOKEN_STRING, .line = line, .column = col, }; push_token(&tokens, token); } break; case '\'': { Token token = (Token){ .type = TOKEN_QUOTE, .line = line, .column = col, }; push_token(&tokens, token); } break; case '(': { if (scan_peek(&scanner) == ')') { scan_next(&scanner); Token token = (Token){ .type = TOKEN_NIL, .line = line, .column = col, }; push_token(&tokens, token); } else { Token token = (Token){ .type = TOKEN_LPAREN, .line = line, .column = col, }; push_token(&tokens, token); } } break; case ')': { Token token = (Token){ .type = TOKEN_RPAREN, .line = line, .column = col, }; push_token(&tokens, token); } break; default: { size_t n = 1; while (!is_delimiter(scan_peek(&scanner))) { scan_next(&scanner); n++; } if (c == EOF || c == '\0') { break; } Token token = (Token){ .value = (StringView){ .start = &sv->start[offset], .n = n, }, .type = TOKEN_SYMBOL, .line = line, .column = col, }; token.type = find_primitive_type(token.value); push_token(&tokens, token); } break; } } // Push EOF token. Token token = (Token){ .type = TOKEN_EOF, .line = scanner.line_number, .column = 1, }; push_token(&tokens, token); return tokens; }