From baaef414186e60dbb127662d5f4ffab10ebf225e Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Fri, 8 Oct 2021 11:37:03 +0200 Subject: Add initial tokenizer --- Makefile | 4 +- src/bootstrap/main.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 111 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 8bd5560..0564240 100755 --- a/Makefile +++ b/Makefile @@ -34,9 +34,9 @@ else CFLAGS += $(RELEASE_CFLAGS) endif -main: tools $(BUILD_DIR) $(ROM) $(BIN) +main: $(BIN) -$(BIN): $(SRC_MAIN) $(WATCH_SRC) +$(BIN): $(SRC_MAIN) $(WATCH_SRC) $(BUILD_DIR) $(CC) $(CFLAGS) $(LDFLAGS) -o $(BIN) $(SRC_MAIN) $(LDLIBS) # Create build directory if needed. diff --git a/src/bootstrap/main.c b/src/bootstrap/main.c index 861c206..98f313b 100755 --- a/src/bootstrap/main.c +++ b/src/bootstrap/main.c @@ -1,4 +1,5 @@ #include +#include #include "shorthand.h" @@ -40,6 +41,111 @@ read_line(void) { return (StringView){.start = (char *)&readline_buf, .n = n}; } +typedef struct Tokens { + StringView *start; + size_t n; +} Tokens; + +Tokens +tokenize(StringView sv) { + // NOTE: Not allocating any memory for now, but we are limited by a maximum + // number of tokens we can process. + #define TOKENS_BUF_SIZE 1024 + static StringView tokens_buf[TOKENS_BUF_SIZE]; + + // Clear buffer. + for (size_t i = 0; i < TOKENS_BUF_SIZE; i++) { + tokens_buf[i] = (StringView){0}; + } + + size_t n = 0; + size_t token_n = 0; + for (size_t i = 0; i < sv.n; i++) { + switch (sv.start[i]) { + case ' ': + case '\f': + case '\n': + case '\r': + case '\t': + case '\v': { + if (token_n != 0) { + // Push token. + tokens_buf[n++] = (StringView){ + .start = &sv.start[i - token_n], + .n = token_n, + }; + token_n = 0; + } + continue; + } break; + case '(': { + if ((i + 1 < sv.n)) { + char next_c = sv.start[i + 1]; + if (isspace(next_c)) { + fprintf(stderr, "error: lparen delimiter followed by space\n"); + return (Tokens){0}; + } + } + + if (token_n != 0) { + fprintf(stderr, "error: lparen delimiter within symbol name\n"); + return (Tokens){0}; + } + // Push paren token. + tokens_buf[n++] = (StringView){ + .start = &sv.start[i], + .n = 1, + }; + } break; + case ')': { + if ((i + 1 < sv.n)) { + char next_c = sv.start[i + 1]; + if ((next_c != ')' && !isspace(next_c))) { + fprintf(stderr, "error: rparen delimiter within symbol name\n"); + return (Tokens){0}; + } + } + + if (token_n != 0) { + // Push previous token. + tokens_buf[n++] = (StringView){ + .start = &sv.start[i - token_n], + .n = token_n, + }; + token_n = 0; + } + + // Push paren token. + tokens_buf[n++] = (StringView){ + .start = &sv.start[i], + .n = 1, + }; + } break; + default: { + token_n++; + } break; + } + } + if (token_n != 0) { + // End of line encountered. + tokens_buf[n++] = (StringView){ + .start = &sv.start[sv.n - token_n], + .n = token_n, + }; + } + + // DEBUG: Printing tokens. + printf("N_TOKENS: %ld\n", n); + for (size_t i = 0; i < n; i++) { + printf("TOKEN: "); + sv_write(tokens_buf[i]); + printf("\tN: %ld", tokens_buf[i].n); + printf("\n"); + } + + return (Tokens){.start = (StringView *)&tokens_buf, .n = n}; +} + void display(StringView sv) { if (sv.n != 0) { @@ -55,7 +161,9 @@ main(void) { printf("BDL REPL (Press Ctrl-C to exit)\n"); while (true) { printf(REPL_PROMPT); - display(read_line()); + StringView line = read_line(); + tokenize(line); + display(line); } return 0; } -- cgit v1.2.1