aboutsummaryrefslogtreecommitdiffstats
path: root/src/bytecode/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/bytecode/lexer.c')
-rwxr-xr-xsrc/bytecode/lexer.c294
1 files changed, 0 insertions, 294 deletions
diff --git a/src/bytecode/lexer.c b/src/bytecode/lexer.c
deleted file mode 100755
index a80c845..0000000
--- a/src/bytecode/lexer.c
+++ /dev/null
@@ -1,294 +0,0 @@
1#include "lexer.h"
2
3static const char* token_str[] = {
4 [TOKEN_UNKNOWN] = "TOKEN_UNKNOWN",
5 [TOKEN_LPAREN] = "TOKEN_LPAREN",
6 [TOKEN_RPAREN] = "TOKEN_RPAREN",
7 [TOKEN_FIXNUM] = "TOKEN_FIXNUM",
8 [TOKEN_SYMBOL] = "TOKEN_SYMBOL",
9 [TOKEN_STRING] = "TOKEN_STRING",
10 [TOKEN_NIL] = "TOKEN_NIL",
11 [TOKEN_QUOTE] = "TOKEN_QUOTE",
12 [TOKEN_TRUE] = "TOKEN_TRUE",
13 [TOKEN_FALSE] = "TOKEN_FALSE",
14 [TOKEN_IF] = "TOKEN_IF",
15 [TOKEN_ELSE] = "TOKEN_ELSE",
16 [TOKEN_DEF] = "TOKEN_DEF",
17 [TOKEN_SET] = "TOKEN_SET",
18 [TOKEN_FUN] = "TOKEN_FUN",
19 [TOKEN_LAMBDA] = "TOKEN_LAMBDA",
20 [TOKEN_DISPLAY] = "TOKEN_DISPLAY",
21 [TOKEN_PRINT] = "TOKEN_PRINT",
22 [TOKEN_NEWLINE] = "TOKEN_NEWLINE",
23 [TOKEN_ADD] = "TOKEN_ADD",
24 [TOKEN_SUB] = "TOKEN_SUB",
25 [TOKEN_MUL] = "TOKEN_MUL",
26 [TOKEN_DIV] = "TOKEN_DIV",
27 [TOKEN_MOD] = "TOKEN_MOD",
28 [TOKEN_NOT] = "TOKEN_NOT",
29 [TOKEN_AND] = "TOKEN_AND",
30 [TOKEN_OR] = "TOKEN_OR",
31 [TOKEN_EQUAL] = "TOKEN_EQUAL",
32 [TOKEN_LESS] = "TOKEN_LESS",
33 [TOKEN_GREATER] = "TOKEN_GREATER",
34 [TOKEN_LESS_EQUAL] = "TOKEN_LESS_EQUAL",
35 [TOKEN_GREATER_EQUAL] = "TOKEN_GREATER_EQUAL",
36 [TOKEN_EOF] = "TOKEN_EOF",
37};
38
39void
40print_token(Token tok) {
41 printf("LINE: %3ld COL: %3ld ", tok.line, tok.column);
42 printf("%s", token_str[tok.type]);
43 switch (tok.type) {
44 case TOKEN_FIXNUM: {
45 printf(" -> ");
46 sv_write(&tok.value);
47 } break;
48 case TOKEN_SYMBOL: {
49 printf(" -> ");
50 sv_write(&tok.value);
51 } break;
52 case TOKEN_STRING: {
53 printf(" -> ");
54 sv_write(&tok.value);
55 } break;
56 default: {
57 } break;
58 }
59 printf("\n");
60}
61
62char
63scan_next(Scanner *scanner) {
64 char c = sv_next(&scanner->current);
65 if (c == '\n') {
66 scanner->line_number++;
67 scanner->col_number = 1;
68 } else {
69 scanner->col_number++;
70 }
71 scanner->offset++;
72 return c;
73}
74
75char
76scan_peek(const Scanner *scanner) {
77 return sv_peek(&scanner->current);
78}
79
80bool
81scan_has_next(const Scanner *scanner) {
82 return scanner->current.n != 0;
83}
84
85void
86skip_whitespace(Scanner *scanner) {
87 while (scan_has_next(scanner)) {
88 char c = scan_peek(scanner);
89 switch (c) {
90 case ' ':
91 case '\f':
92 case '\n':
93 case '\r':
94 case '\t':
95 case '\v': {
96 scan_next(scanner);
97 } break;
98 default: {
99 return;
100 } break;
101 }
102 }
103}
104
105bool
106is_delimiter(char c) {
107 switch (c) {
108 case EOF:
109 case '\0':
110 case ';':
111 case '"':
112 case '\'':
113 case '(':
114 case ')':
115 case ' ':
116 case '\f':
117 case '\n':
118 case '\r':
119 case '\t':
120 case '\v': {
121 return true;
122 } break;
123 }
124 return false;
125}
126
127#define TOKEN_IS_KEYWORD(VAL, KEYWORD) \
128 sv_equal(&(VAL), &(StringView){(KEYWORD), sizeof(KEYWORD) - 1})
129
130TokenType
131find_primitive_type(const StringView value) {
132 bool is_fixnum = true;
133 for (size_t i = 0; i < value.n; i++) {
134 char c = value.start[i];
135 if (i == 0 && c == '-' && value.n > 1) {
136 continue;
137 }
138 if (!(c >= '0' && c <= '9')) {
139 is_fixnum = false;
140 break;
141 }
142 }
143 if (is_fixnum) {
144 return TOKEN_FIXNUM;
145 }
146 if (TOKEN_IS_KEYWORD(value, "true")) { return TOKEN_TRUE; }
147 if (TOKEN_IS_KEYWORD(value, "false")) { return TOKEN_FALSE; }
148 if (TOKEN_IS_KEYWORD(value, "if")) { return TOKEN_IF; }
149 if (TOKEN_IS_KEYWORD(value, "else")) { return TOKEN_ELSE; }
150 if (TOKEN_IS_KEYWORD(value, "def")) { return TOKEN_DEF; }
151 if (TOKEN_IS_KEYWORD(value, "set!")) { return TOKEN_SET; }
152 if (TOKEN_IS_KEYWORD(value, "fun")) { return TOKEN_FUN; }
153 if (TOKEN_IS_KEYWORD(value, "lambda")) { return TOKEN_LAMBDA; }
154 if (TOKEN_IS_KEYWORD(value, "display")) { return TOKEN_DISPLAY; }
155 if (TOKEN_IS_KEYWORD(value, "print")) { return TOKEN_PRINT; }
156 if (TOKEN_IS_KEYWORD(value, "newline")) { return TOKEN_NEWLINE; }
157 if (TOKEN_IS_KEYWORD(value, "+")) { return TOKEN_ADD; }
158 if (TOKEN_IS_KEYWORD(value, "-")) { return TOKEN_SUB; }
159 if (TOKEN_IS_KEYWORD(value, "*")) { return TOKEN_MUL; }
160 if (TOKEN_IS_KEYWORD(value, "/")) { return TOKEN_DIV; }
161 if (TOKEN_IS_KEYWORD(value, "%")) { return TOKEN_MOD; }
162 if (TOKEN_IS_KEYWORD(value, "not")) { return TOKEN_NOT; }
163 if (TOKEN_IS_KEYWORD(value, "and")) { return TOKEN_AND; }
164 if (TOKEN_IS_KEYWORD(value, "or")) { return TOKEN_OR; }
165 if (TOKEN_IS_KEYWORD(value, "=")) { return TOKEN_EQUAL; }
166 if (TOKEN_IS_KEYWORD(value, "<")) { return TOKEN_LESS; }
167 if (TOKEN_IS_KEYWORD(value, ">")) { return TOKEN_GREATER; }
168 if (TOKEN_IS_KEYWORD(value, "<=")) { return TOKEN_LESS_EQUAL; }
169 if (TOKEN_IS_KEYWORD(value, ">=")) { return TOKEN_GREATER_EQUAL; }
170
171 return TOKEN_SYMBOL;
172}
173
174Token *
175tokenize(const StringView *sv) {
176 Token *tokens = NULL;
177 array_init(tokens, 1);
178 Scanner scanner = (Scanner){
179 .current = *sv,
180 .line_number = 1,
181 .col_number = 1,
182 };
183
184 while (scan_has_next(&scanner)) {
185 skip_whitespace(&scanner);
186 size_t line = scanner.line_number;
187 size_t col = scanner.col_number;
188 size_t offset = scanner.offset;
189 char c = scan_next(&scanner);
190 switch (c) {
191 case ';': {
192 while ((c = scan_next(&scanner)) != '\n' && c != '\0') {}
193 } break;
194 case '"': {
195 char prev = c;
196 bool found = false;
197 size_t n = 0;
198 while (scan_has_next(&scanner)) {
199 c = scan_next(&scanner);
200 if (c == '"' && prev != '\\') {
201 found = true;
202 break;
203 }
204 prev = c;
205 n++;
206 }
207 if (!found) {
208 error_push((Error){
209 .type = ERR_TYPE_LEXER,
210 .value = ERR_UNMATCHED_STRING,
211 .line = line,
212 .col = col,
213 });
214 return tokens;
215 }
216 Token token = (Token){
217 .value = (StringView){
218 .start = &sv->start[offset + 1],
219 .n = n,
220 },
221 .type = TOKEN_STRING,
222 .line = line,
223 .column = col,
224 };
225 array_push(tokens, token);
226 } break;
227 case '\'': {
228 Token token = (Token){
229 .type = TOKEN_QUOTE,
230 .line = line,
231 .column = col,
232 };
233 array_push(tokens, token);
234 } break;
235 case '(': {
236 if (scan_peek(&scanner) == ')') {
237 scan_next(&scanner);
238 Token token = (Token){
239 .type = TOKEN_NIL,
240 .line = line,
241 .column = col,
242 };
243 array_push(tokens, token);
244 } else {
245 Token token = (Token){
246 .type = TOKEN_LPAREN,
247 .line = line,
248 .column = col,
249 };
250 array_push(tokens, token);
251 }
252 } break;
253 case ')': {
254 Token token = (Token){
255 .type = TOKEN_RPAREN,
256 .line = line,
257 .column = col,
258 };
259 array_push(tokens, token);
260 } break;
261 default: {
262 size_t n = 1;
263 while (!is_delimiter(scan_peek(&scanner))) {
264 scan_next(&scanner);
265 n++;
266 }
267 if (c == EOF || c == '\0') {
268 break;
269 }
270 Token token = (Token){
271 .value = (StringView){
272 .start = &sv->start[offset],
273 .n = n,
274 },
275 .type = TOKEN_SYMBOL,
276 .line = line,
277 .column = col,
278 };
279 token.type = find_primitive_type(token.value);
280 array_push(tokens, token);
281 } break;
282 }
283 }
284
285 // Push EOF token.
286 Token token = (Token){
287 .type = TOKEN_EOF,
288 .line = scanner.line_number,
289 .column = 1,
290 };
291 array_push(tokens, token);
292
293 return tokens;
294}