diff options
Diffstat (limited to 'src/bytecode/lexer.c')
-rwxr-xr-x | src/bytecode/lexer.c | 294 |
1 files changed, 0 insertions, 294 deletions
diff --git a/src/bytecode/lexer.c b/src/bytecode/lexer.c deleted file mode 100755 index a80c845..0000000 --- a/src/bytecode/lexer.c +++ /dev/null | |||
@@ -1,294 +0,0 @@ | |||
1 | #include "lexer.h" | ||
2 | |||
3 | static const char* token_str[] = { | ||
4 | [TOKEN_UNKNOWN] = "TOKEN_UNKNOWN", | ||
5 | [TOKEN_LPAREN] = "TOKEN_LPAREN", | ||
6 | [TOKEN_RPAREN] = "TOKEN_RPAREN", | ||
7 | [TOKEN_FIXNUM] = "TOKEN_FIXNUM", | ||
8 | [TOKEN_SYMBOL] = "TOKEN_SYMBOL", | ||
9 | [TOKEN_STRING] = "TOKEN_STRING", | ||
10 | [TOKEN_NIL] = "TOKEN_NIL", | ||
11 | [TOKEN_QUOTE] = "TOKEN_QUOTE", | ||
12 | [TOKEN_TRUE] = "TOKEN_TRUE", | ||
13 | [TOKEN_FALSE] = "TOKEN_FALSE", | ||
14 | [TOKEN_IF] = "TOKEN_IF", | ||
15 | [TOKEN_ELSE] = "TOKEN_ELSE", | ||
16 | [TOKEN_DEF] = "TOKEN_DEF", | ||
17 | [TOKEN_SET] = "TOKEN_SET", | ||
18 | [TOKEN_FUN] = "TOKEN_FUN", | ||
19 | [TOKEN_LAMBDA] = "TOKEN_LAMBDA", | ||
20 | [TOKEN_DISPLAY] = "TOKEN_DISPLAY", | ||
21 | [TOKEN_PRINT] = "TOKEN_PRINT", | ||
22 | [TOKEN_NEWLINE] = "TOKEN_NEWLINE", | ||
23 | [TOKEN_ADD] = "TOKEN_ADD", | ||
24 | [TOKEN_SUB] = "TOKEN_SUB", | ||
25 | [TOKEN_MUL] = "TOKEN_MUL", | ||
26 | [TOKEN_DIV] = "TOKEN_DIV", | ||
27 | [TOKEN_MOD] = "TOKEN_MOD", | ||
28 | [TOKEN_NOT] = "TOKEN_NOT", | ||
29 | [TOKEN_AND] = "TOKEN_AND", | ||
30 | [TOKEN_OR] = "TOKEN_OR", | ||
31 | [TOKEN_EQUAL] = "TOKEN_EQUAL", | ||
32 | [TOKEN_LESS] = "TOKEN_LESS", | ||
33 | [TOKEN_GREATER] = "TOKEN_GREATER", | ||
34 | [TOKEN_LESS_EQUAL] = "TOKEN_LESS_EQUAL", | ||
35 | [TOKEN_GREATER_EQUAL] = "TOKEN_GREATER_EQUAL", | ||
36 | [TOKEN_EOF] = "TOKEN_EOF", | ||
37 | }; | ||
38 | |||
39 | void | ||
40 | print_token(Token tok) { | ||
41 | printf("LINE: %3ld COL: %3ld ", tok.line, tok.column); | ||
42 | printf("%s", token_str[tok.type]); | ||
43 | switch (tok.type) { | ||
44 | case TOKEN_FIXNUM: { | ||
45 | printf(" -> "); | ||
46 | sv_write(&tok.value); | ||
47 | } break; | ||
48 | case TOKEN_SYMBOL: { | ||
49 | printf(" -> "); | ||
50 | sv_write(&tok.value); | ||
51 | } break; | ||
52 | case TOKEN_STRING: { | ||
53 | printf(" -> "); | ||
54 | sv_write(&tok.value); | ||
55 | } break; | ||
56 | default: { | ||
57 | } break; | ||
58 | } | ||
59 | printf("\n"); | ||
60 | } | ||
61 | |||
62 | char | ||
63 | scan_next(Scanner *scanner) { | ||
64 | char c = sv_next(&scanner->current); | ||
65 | if (c == '\n') { | ||
66 | scanner->line_number++; | ||
67 | scanner->col_number = 1; | ||
68 | } else { | ||
69 | scanner->col_number++; | ||
70 | } | ||
71 | scanner->offset++; | ||
72 | return c; | ||
73 | } | ||
74 | |||
75 | char | ||
76 | scan_peek(const Scanner *scanner) { | ||
77 | return sv_peek(&scanner->current); | ||
78 | } | ||
79 | |||
80 | bool | ||
81 | scan_has_next(const Scanner *scanner) { | ||
82 | return scanner->current.n != 0; | ||
83 | } | ||
84 | |||
85 | void | ||
86 | skip_whitespace(Scanner *scanner) { | ||
87 | while (scan_has_next(scanner)) { | ||
88 | char c = scan_peek(scanner); | ||
89 | switch (c) { | ||
90 | case ' ': | ||
91 | case '\f': | ||
92 | case '\n': | ||
93 | case '\r': | ||
94 | case '\t': | ||
95 | case '\v': { | ||
96 | scan_next(scanner); | ||
97 | } break; | ||
98 | default: { | ||
99 | return; | ||
100 | } break; | ||
101 | } | ||
102 | } | ||
103 | } | ||
104 | |||
105 | bool | ||
106 | is_delimiter(char c) { | ||
107 | switch (c) { | ||
108 | case EOF: | ||
109 | case '\0': | ||
110 | case ';': | ||
111 | case '"': | ||
112 | case '\'': | ||
113 | case '(': | ||
114 | case ')': | ||
115 | case ' ': | ||
116 | case '\f': | ||
117 | case '\n': | ||
118 | case '\r': | ||
119 | case '\t': | ||
120 | case '\v': { | ||
121 | return true; | ||
122 | } break; | ||
123 | } | ||
124 | return false; | ||
125 | } | ||
126 | |||
127 | #define TOKEN_IS_KEYWORD(VAL, KEYWORD) \ | ||
128 | sv_equal(&(VAL), &(StringView){(KEYWORD), sizeof(KEYWORD) - 1}) | ||
129 | |||
130 | TokenType | ||
131 | find_primitive_type(const StringView value) { | ||
132 | bool is_fixnum = true; | ||
133 | for (size_t i = 0; i < value.n; i++) { | ||
134 | char c = value.start[i]; | ||
135 | if (i == 0 && c == '-' && value.n > 1) { | ||
136 | continue; | ||
137 | } | ||
138 | if (!(c >= '0' && c <= '9')) { | ||
139 | is_fixnum = false; | ||
140 | break; | ||
141 | } | ||
142 | } | ||
143 | if (is_fixnum) { | ||
144 | return TOKEN_FIXNUM; | ||
145 | } | ||
146 | if (TOKEN_IS_KEYWORD(value, "true")) { return TOKEN_TRUE; } | ||
147 | if (TOKEN_IS_KEYWORD(value, "false")) { return TOKEN_FALSE; } | ||
148 | if (TOKEN_IS_KEYWORD(value, "if")) { return TOKEN_IF; } | ||
149 | if (TOKEN_IS_KEYWORD(value, "else")) { return TOKEN_ELSE; } | ||
150 | if (TOKEN_IS_KEYWORD(value, "def")) { return TOKEN_DEF; } | ||
151 | if (TOKEN_IS_KEYWORD(value, "set!")) { return TOKEN_SET; } | ||
152 | if (TOKEN_IS_KEYWORD(value, "fun")) { return TOKEN_FUN; } | ||
153 | if (TOKEN_IS_KEYWORD(value, "lambda")) { return TOKEN_LAMBDA; } | ||
154 | if (TOKEN_IS_KEYWORD(value, "display")) { return TOKEN_DISPLAY; } | ||
155 | if (TOKEN_IS_KEYWORD(value, "print")) { return TOKEN_PRINT; } | ||
156 | if (TOKEN_IS_KEYWORD(value, "newline")) { return TOKEN_NEWLINE; } | ||
157 | if (TOKEN_IS_KEYWORD(value, "+")) { return TOKEN_ADD; } | ||
158 | if (TOKEN_IS_KEYWORD(value, "-")) { return TOKEN_SUB; } | ||
159 | if (TOKEN_IS_KEYWORD(value, "*")) { return TOKEN_MUL; } | ||
160 | if (TOKEN_IS_KEYWORD(value, "/")) { return TOKEN_DIV; } | ||
161 | if (TOKEN_IS_KEYWORD(value, "%")) { return TOKEN_MOD; } | ||
162 | if (TOKEN_IS_KEYWORD(value, "not")) { return TOKEN_NOT; } | ||
163 | if (TOKEN_IS_KEYWORD(value, "and")) { return TOKEN_AND; } | ||
164 | if (TOKEN_IS_KEYWORD(value, "or")) { return TOKEN_OR; } | ||
165 | if (TOKEN_IS_KEYWORD(value, "=")) { return TOKEN_EQUAL; } | ||
166 | if (TOKEN_IS_KEYWORD(value, "<")) { return TOKEN_LESS; } | ||
167 | if (TOKEN_IS_KEYWORD(value, ">")) { return TOKEN_GREATER; } | ||
168 | if (TOKEN_IS_KEYWORD(value, "<=")) { return TOKEN_LESS_EQUAL; } | ||
169 | if (TOKEN_IS_KEYWORD(value, ">=")) { return TOKEN_GREATER_EQUAL; } | ||
170 | |||
171 | return TOKEN_SYMBOL; | ||
172 | } | ||
173 | |||
174 | Token * | ||
175 | tokenize(const StringView *sv) { | ||
176 | Token *tokens = NULL; | ||
177 | array_init(tokens, 1); | ||
178 | Scanner scanner = (Scanner){ | ||
179 | .current = *sv, | ||
180 | .line_number = 1, | ||
181 | .col_number = 1, | ||
182 | }; | ||
183 | |||
184 | while (scan_has_next(&scanner)) { | ||
185 | skip_whitespace(&scanner); | ||
186 | size_t line = scanner.line_number; | ||
187 | size_t col = scanner.col_number; | ||
188 | size_t offset = scanner.offset; | ||
189 | char c = scan_next(&scanner); | ||
190 | switch (c) { | ||
191 | case ';': { | ||
192 | while ((c = scan_next(&scanner)) != '\n' && c != '\0') {} | ||
193 | } break; | ||
194 | case '"': { | ||
195 | char prev = c; | ||
196 | bool found = false; | ||
197 | size_t n = 0; | ||
198 | while (scan_has_next(&scanner)) { | ||
199 | c = scan_next(&scanner); | ||
200 | if (c == '"' && prev != '\\') { | ||
201 | found = true; | ||
202 | break; | ||
203 | } | ||
204 | prev = c; | ||
205 | n++; | ||
206 | } | ||
207 | if (!found) { | ||
208 | error_push((Error){ | ||
209 | .type = ERR_TYPE_LEXER, | ||
210 | .value = ERR_UNMATCHED_STRING, | ||
211 | .line = line, | ||
212 | .col = col, | ||
213 | }); | ||
214 | return tokens; | ||
215 | } | ||
216 | Token token = (Token){ | ||
217 | .value = (StringView){ | ||
218 | .start = &sv->start[offset + 1], | ||
219 | .n = n, | ||
220 | }, | ||
221 | .type = TOKEN_STRING, | ||
222 | .line = line, | ||
223 | .column = col, | ||
224 | }; | ||
225 | array_push(tokens, token); | ||
226 | } break; | ||
227 | case '\'': { | ||
228 | Token token = (Token){ | ||
229 | .type = TOKEN_QUOTE, | ||
230 | .line = line, | ||
231 | .column = col, | ||
232 | }; | ||
233 | array_push(tokens, token); | ||
234 | } break; | ||
235 | case '(': { | ||
236 | if (scan_peek(&scanner) == ')') { | ||
237 | scan_next(&scanner); | ||
238 | Token token = (Token){ | ||
239 | .type = TOKEN_NIL, | ||
240 | .line = line, | ||
241 | .column = col, | ||
242 | }; | ||
243 | array_push(tokens, token); | ||
244 | } else { | ||
245 | Token token = (Token){ | ||
246 | .type = TOKEN_LPAREN, | ||
247 | .line = line, | ||
248 | .column = col, | ||
249 | }; | ||
250 | array_push(tokens, token); | ||
251 | } | ||
252 | } break; | ||
253 | case ')': { | ||
254 | Token token = (Token){ | ||
255 | .type = TOKEN_RPAREN, | ||
256 | .line = line, | ||
257 | .column = col, | ||
258 | }; | ||
259 | array_push(tokens, token); | ||
260 | } break; | ||
261 | default: { | ||
262 | size_t n = 1; | ||
263 | while (!is_delimiter(scan_peek(&scanner))) { | ||
264 | scan_next(&scanner); | ||
265 | n++; | ||
266 | } | ||
267 | if (c == EOF || c == '\0') { | ||
268 | break; | ||
269 | } | ||
270 | Token token = (Token){ | ||
271 | .value = (StringView){ | ||
272 | .start = &sv->start[offset], | ||
273 | .n = n, | ||
274 | }, | ||
275 | .type = TOKEN_SYMBOL, | ||
276 | .line = line, | ||
277 | .column = col, | ||
278 | }; | ||
279 | token.type = find_primitive_type(token.value); | ||
280 | array_push(tokens, token); | ||
281 | } break; | ||
282 | } | ||
283 | } | ||
284 | |||
285 | // Push EOF token. | ||
286 | Token token = (Token){ | ||
287 | .type = TOKEN_EOF, | ||
288 | .line = scanner.line_number, | ||
289 | .column = 1, | ||
290 | }; | ||
291 | array_push(tokens, token); | ||
292 | |||
293 | return tokens; | ||
294 | } | ||