diff options
author | Bad Diode <bd@badd10de.dev> | 2021-10-08 11:37:03 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2021-10-08 11:37:03 +0200 |
commit | baaef414186e60dbb127662d5f4ffab10ebf225e (patch) | |
tree | 0faa41605fa9e6348c1959e8b88b01a5f85ee4da /src/bootstrap | |
parent | 96d27c2a3e1a0fa0878beb3f9cd02f4b4ed8fdbb (diff) | |
download | bdl-baaef414186e60dbb127662d5f4ffab10ebf225e.tar.gz bdl-baaef414186e60dbb127662d5f4ffab10ebf225e.zip |
Add initial tokenizer
Diffstat (limited to 'src/bootstrap')
-rwxr-xr-x | src/bootstrap/main.c | 110 |
1 files changed, 109 insertions, 1 deletions
diff --git a/src/bootstrap/main.c b/src/bootstrap/main.c index 861c206..98f313b 100755 --- a/src/bootstrap/main.c +++ b/src/bootstrap/main.c | |||
@@ -1,4 +1,5 @@ | |||
1 | #include <stdio.h> | 1 | #include <stdio.h> |
2 | #include <ctype.h> | ||
2 | 3 | ||
3 | #include "shorthand.h" | 4 | #include "shorthand.h" |
4 | 5 | ||
@@ -40,6 +41,111 @@ read_line(void) { | |||
40 | return (StringView){.start = (char *)&readline_buf, .n = n}; | 41 | return (StringView){.start = (char *)&readline_buf, .n = n}; |
41 | } | 42 | } |
42 | 43 | ||
44 | typedef struct Tokens { | ||
45 | StringView *start; | ||
46 | size_t n; | ||
47 | } Tokens; | ||
48 | |||
49 | Tokens | ||
50 | tokenize(StringView sv) { | ||
51 | // NOTE: Not allocating any memory for now, but we are limited by a maximum | ||
52 | // number of tokens we can process. | ||
53 | #define TOKENS_BUF_SIZE 1024 | ||
54 | static StringView tokens_buf[TOKENS_BUF_SIZE]; | ||
55 | |||
56 | // Clear buffer. | ||
57 | for (size_t i = 0; i < TOKENS_BUF_SIZE; i++) { | ||
58 | tokens_buf[i] = (StringView){0}; | ||
59 | } | ||
60 | |||
61 | size_t n = 0; | ||
62 | size_t token_n = 0; | ||
63 | for (size_t i = 0; i < sv.n; i++) { | ||
64 | switch (sv.start[i]) { | ||
65 | case ' ': | ||
66 | case '\f': | ||
67 | case '\n': | ||
68 | case '\r': | ||
69 | case '\t': | ||
70 | case '\v': { | ||
71 | if (token_n != 0) { | ||
72 | // Push token. | ||
73 | tokens_buf[n++] = (StringView){ | ||
74 | .start = &sv.start[i - token_n], | ||
75 | .n = token_n, | ||
76 | }; | ||
77 | token_n = 0; | ||
78 | } | ||
79 | continue; | ||
80 | } break; | ||
81 | case '(': { | ||
82 | if ((i + 1 < sv.n)) { | ||
83 | char next_c = sv.start[i + 1]; | ||
84 | if (isspace(next_c)) { | ||
85 | fprintf(stderr, "error: lparen delimiter followed by space\n"); | ||
86 | return (Tokens){0}; | ||
87 | } | ||
88 | } | ||
89 | |||
90 | if (token_n != 0) { | ||
91 | fprintf(stderr, "error: lparen delimiter within symbol name\n"); | ||
92 | return (Tokens){0}; | ||
93 | } | ||
94 | // Push paren token. | ||
95 | tokens_buf[n++] = (StringView){ | ||
96 | .start = &sv.start[i], | ||
97 | .n = 1, | ||
98 | }; | ||
99 | } break; | ||
100 | case ')': { | ||
101 | if ((i + 1 < sv.n)) { | ||
102 | char next_c = sv.start[i + 1]; | ||
103 | if ((next_c != ')' && !isspace(next_c))) { | ||
104 | fprintf(stderr, "error: rparen delimiter within symbol name\n"); | ||
105 | return (Tokens){0}; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | if (token_n != 0) { | ||
110 | // Push previous token. | ||
111 | tokens_buf[n++] = (StringView){ | ||
112 | .start = &sv.start[i - token_n], | ||
113 | .n = token_n, | ||
114 | }; | ||
115 | token_n = 0; | ||
116 | } | ||
117 | |||
118 | // Push paren token. | ||
119 | tokens_buf[n++] = (StringView){ | ||
120 | .start = &sv.start[i], | ||
121 | .n = 1, | ||
122 | }; | ||
123 | } break; | ||
124 | default: { | ||
125 | token_n++; | ||
126 | } break; | ||
127 | } | ||
128 | } | ||
129 | if (token_n != 0) { | ||
130 | // End of line encountered. | ||
131 | tokens_buf[n++] = (StringView){ | ||
132 | .start = &sv.start[sv.n - token_n], | ||
133 | .n = token_n, | ||
134 | }; | ||
135 | } | ||
136 | |||
137 | // DEBUG: Printing tokens. | ||
138 | printf("N_TOKENS: %ld\n", n); | ||
139 | for (size_t i = 0; i < n; i++) { | ||
140 | printf("TOKEN: "); | ||
141 | sv_write(tokens_buf[i]); | ||
142 | printf("\tN: %ld", tokens_buf[i].n); | ||
143 | printf("\n"); | ||
144 | } | ||
145 | |||
146 | return (Tokens){.start = (StringView *)&tokens_buf, .n = n}; | ||
147 | } | ||
148 | |||
43 | void | 149 | void |
44 | display(StringView sv) { | 150 | display(StringView sv) { |
45 | if (sv.n != 0) { | 151 | if (sv.n != 0) { |
@@ -55,7 +161,9 @@ main(void) { | |||
55 | printf("BDL REPL (Press Ctrl-C to exit)\n"); | 161 | printf("BDL REPL (Press Ctrl-C to exit)\n"); |
56 | while (true) { | 162 | while (true) { |
57 | printf(REPL_PROMPT); | 163 | printf(REPL_PROMPT); |
58 | display(read_line()); | 164 | StringView line = read_line(); |
165 | tokenize(line); | ||
166 | display(line); | ||
59 | } | 167 | } |
60 | return 0; | 168 | return 0; |
61 | } | 169 | } |