aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2024-06-15 16:52:36 +0200
committerBad Diode <bd@badd10de.dev>2024-06-15 16:52:36 +0200
commite7cd0d47a603e4199b0ee7daa2434fc0db602bad (patch)
tree511cfbe2cea66e45b4ca7669ed9a101763ae3537
parent893b52223d274c675272cee55768a9d5853420fb (diff)
downloadbdl-e7cd0d47a603e4199b0ee7daa2434fc0db602bad.tar.gz
bdl-e7cd0d47a603e4199b0ee7daa2434fc0db602bad.zip
Move lexer code to lexer.c file
-rw-r--r--Makefile3
-rw-r--r--src/lexer.c734
-rw-r--r--src/lexer.h99
-rw-r--r--src/main.c631
4 files changed, 532 insertions, 935 deletions
diff --git a/Makefile b/Makefile
index 45c1389..4e66983 100644
--- a/Makefile
+++ b/Makefile
@@ -43,9 +43,6 @@ $(BIN): $(SRC_MAIN) $(WATCH_SRC) $(BUILD_DIR)
43$(BUILD_DIR): 43$(BUILD_DIR):
44 mkdir -p $(BUILD_DIR) 44 mkdir -p $(BUILD_DIR)
45 45
46tests: $(BIN)
47 ./$(BIN) tests/constants/numbers.bdl
48
49run: $(BIN) 46run: $(BIN)
50 $(BIN) tests/literals.bad 47 $(BIN) tests/literals.bad
51 48
diff --git a/src/lexer.c b/src/lexer.c
index a6d7c74..df998f2 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -1,130 +1,192 @@
1#include "lexer.h" 1#define LEXER_MEM GB(2)
2#include "errors.h" 2
3 3typedef enum TokenType {
4static const char* token_str[] = { 4 TOK_UNKNOWN = 0,
5 [TOKEN_UNKNOWN] = "UNKNOWN", 5
6 [TOKEN_LPAREN] = "LPAREN", 6 // Parentheses.
7 [TOKEN_RPAREN] = "RPAREN", 7 TOK_LPAREN, // (
8 [TOKEN_LSQUARE] = "LSQUARE", 8 TOK_RPAREN, // )
9 [TOKEN_RSQUARE] = "RSQUARE", 9 TOK_LSQUARE, // [
10 [TOKEN_LCURLY] = "LCURLY", 10 TOK_RSQUARE, // ]
11 [TOKEN_RCURLY] = "RCURLY", 11 TOK_LCURLY, // {
12 [TOKEN_NUMBER] = "NUMBER", 12 TOK_RCURLY, // }
13 [TOKEN_SYMBOL] = "SYMBOL", 13
14 [TOKEN_STRING] = "STRING", 14 // Basic literals.
15 [TOKEN_NIL] = "NIL", 15 TOK_NUMBER,
16 [TOKEN_TRUE] = "TRUE", 16 TOK_SYMBOL,
17 [TOKEN_FALSE] = "FALSE", 17 TOK_STRING,
18 [TOKEN_LAMBDA] = "LAMBDA", 18
19 [TOKEN_IF] = "IF", 19 // Keywords.
20 [TOKEN_DEF] = "DEF", 20 TOK_BREAK, // break
21 [TOKEN_SET] = "SET", 21 TOK_CASE, // case
22 [TOKEN_FUN] = "FUN", 22 TOK_CONTINUE, // continue
23 [TOKEN_STRUCT] = "STRUCT", 23 TOK_FALSE, // false
24 [TOKEN_ADD] = "ADD", 24 TOK_FUN, // fun
25 [TOKEN_SUB] = "SUB", 25 TOK_IF, // if
26 [TOKEN_MUL] = "MUL", 26 TOK_LET, // let
27 [TOKEN_DIV] = "DIV", 27 TOK_MATCH, // match
28 [TOKEN_MOD] = "MOD", 28 TOK_NIL, // nil
29 [TOKEN_NOT] = "NOT", 29 TOK_RETURN, // return
30 [TOKEN_AND] = "AND", 30 TOK_SET, // set
31 [TOKEN_OR] = "OR", 31 TOK_STRUCT, // struct
32 [TOKEN_EQ] = "EQ", 32 TOK_TRUE, // true
33 [TOKEN_LT] = "LT", 33 TOK_WHILE, // while
34 [TOKEN_GT] = "GT", 34
35 [TOKEN_LE] = "LE", 35 // Arithmetic ops.
36 [TOKEN_GE] = "GE", 36 TOK_ADD, // +
37 [TOKEN_COLON] = "COLON", 37 TOK_SUB, // -
38 [TOKEN_DOT] = "DOT", 38 TOK_MUL, // *
39 [TOKEN_AT] = "AT", 39 TOK_DIV, // /
40 [TOKEN_EOF] = "EOF", 40 TOK_MOD, // %
41}; 41
42 // Logical ops.
43 TOK_NOT, // !
44 TOK_AND, // &&
45 TOK_OR, // ||
46 TOK_EQ, // ==
47 TOK_NOTEQ, // !=
48 TOK_LT, // <
49 TOK_GT, // >
50 TOK_LE, // <=
51 TOK_GE, // >=
52
53 // Bitwise ops.
54 TOK_BITNOT, // ~
55 TOK_BITAND, // &
56 TOK_BITOR, // |
57 TOK_BITLSHIFT, // <<
58 TOK_BITRSHIFT, // >>
59
60 // Special ops.
61 TOK_COLON, // :
62 TOK_DOT, // .
63 TOK_AT, // @
64 TOK_ASSIGN, // =
65
66 // End of file.
67 TOK_EOF,
68} TokenType;
69
70Str token_str[] = {
71 [TOK_UNKNOWN] = cstr("UNKNOWN"),
72
73 // Parentheses.
74 [TOK_LPAREN] = cstr("LPAREN"),
75 [TOK_RPAREN] = cstr("RPAREN"),
76 [TOK_LSQUARE] = cstr("LSQUARE"),
77 [TOK_RSQUARE] = cstr("RSQUARE"),
78 [TOK_LCURLY] = cstr("LCURLY"),
79 [TOK_RCURLY] = cstr("RCURLY"),
80
81 // Basic literals.
82 [TOK_NUMBER] = cstr("NUMBER"),
83 [TOK_SYMBOL] = cstr("SYMBOL"),
84 [TOK_STRING] = cstr("STRING"),
85
86 // Keywords.
87 [TOK_BREAK] = cstr("BREAK"),
88 [TOK_CASE] = cstr("CASE"),
89 [TOK_CONTINUE] = cstr("CONTINUE"),
90 [TOK_FALSE] = cstr("FALSE"),
91 [TOK_FUN] = cstr("FUN"),
92 [TOK_IF] = cstr("IF"),
93 [TOK_LET] = cstr("LET"),
94 [TOK_MATCH] = cstr("MATCH"),
95 [TOK_NIL] = cstr("NIL"),
96 [TOK_RETURN] = cstr("RETURN"),
97 [TOK_SET] = cstr("SET"),
98 [TOK_STRUCT] = cstr("STRUCT"),
99 [TOK_TRUE] = cstr("TRUE"),
100 [TOK_WHILE] = cstr("WHILE"),
101
102 // Arithmetic ops.
103 [TOK_ADD] = cstr("ADD"),
104 [TOK_SUB] = cstr("SUB"),
105 [TOK_MUL] = cstr("MUL"),
106 [TOK_DIV] = cstr("DIV"),
107 [TOK_MOD] = cstr("MOD"),
42 108
43typedef struct Keyword { 109 // Logical ops.
44 char *str; 110 [TOK_NOT] = cstr("NOT"),
45 size_t n; 111 [TOK_AND] = cstr("AND"),
46 TokenType token; 112 [TOK_OR] = cstr("OR"),
47} Keyword; 113 [TOK_EQ] = cstr("EQ"),
48 114 [TOK_NOTEQ] = cstr("NOTEQ"),
49#define KEYWORD(STR,TOK) {(STR), sizeof(STR) - 1, (TOK)} 115 [TOK_LT] = cstr("LT"),
50 116 [TOK_GT] = cstr("GT"),
51static const Keyword keywords[] = { 117 [TOK_LE] = cstr("LE"),
52 KEYWORD("nil", TOKEN_NIL), 118 [TOK_GE] = cstr("GE"),
53 KEYWORD("true", TOKEN_TRUE), 119
54 KEYWORD("false", TOKEN_FALSE), 120 // Bitwise ops.
55 KEYWORD("lambda", TOKEN_LAMBDA), 121 [TOK_BITNOT] = cstr("BITNOT"),
56 KEYWORD("if", TOKEN_IF), 122 [TOK_BITAND] = cstr("BITAND"),
57 KEYWORD("def", TOKEN_DEF), 123 [TOK_BITOR] = cstr("BITOR"),
58 KEYWORD("set", TOKEN_SET), 124 [TOK_BITLSHIFT] = cstr("BITLSHIFT"),
59 KEYWORD("fun", TOKEN_FUN), 125 [TOK_BITRSHIFT] = cstr("BITRSHIFT"),
60 KEYWORD("struct", TOKEN_STRUCT), 126
61 KEYWORD("+", TOKEN_ADD), 127 // Special ops.
62 KEYWORD("-", TOKEN_SUB), 128 [TOK_COLON] = cstr("COLON"),
63 KEYWORD("*", TOKEN_MUL), 129 [TOK_DOT] = cstr("DOT"),
64 KEYWORD("/", TOKEN_DIV), 130 [TOK_AT] = cstr("AT"),
65 KEYWORD("%", TOKEN_MOD), 131 [TOK_ASSIGN] = cstr("ASSIGN"),
66 KEYWORD("not", TOKEN_NOT), 132
67 KEYWORD("and", TOKEN_AND), 133 // End of file.
68 KEYWORD("or", TOKEN_OR), 134 [TOK_EOF] = cstr("EOF"),
69 KEYWORD("=", TOKEN_EQ),
70 KEYWORD("<", TOKEN_LT),
71 KEYWORD(">", TOKEN_GT),
72 KEYWORD("<=", TOKEN_LE),
73 KEYWORD(">=", TOKEN_GE),
74}; 135};
75 136
76void 137typedef struct Token {
77print_token(Token tok) { 138 TokenType type;
78 printf("[%4ld:%-4ld] ", tok.line, tok.col); 139 Str val;
79 printf("%s", token_str[tok.type]); 140 sz line;
80 switch (tok.type) { 141 sz col;
81 case TOKEN_NUMBER: 142} Token;
82 case TOKEN_SYMBOL: 143
83 case TOKEN_STRING: { 144typedef struct Scanner {
84 printf(" -> "); 145 Str str;
85 sv_write(&tok.value); 146 sz line;
86 } break; 147 sz col;
87 default: { 148} Scanner;
88 } break;
89 }
90 printf("\n");
91}
92 149
93char 150char
94scan_next(Scanner *scanner) { 151scan_next(Scanner *scanner) {
95 char c = sv_next(&scanner->current); 152 char c = str_next(&scanner->str);
96 if (c == '\n') { 153 if (c == '\n') {
97 scanner->line_number++; 154 scanner->line++;
98 scanner->col_number = 1; 155 scanner->col = 0;
99 } else { 156 } else {
100 scanner->col_number++; 157 scanner->col++;
101 } 158 }
102 scanner->offset++;
103 return c; 159 return c;
104} 160}
105 161
106void 162bool
107scan_rewind(Scanner *scanner) { 163scan_has_next(Scanner *scanner) {
108 sv_rewind(&scanner->current); 164 return scanner->str.size;
109 scanner->offset--;
110} 165}
111 166
112char 167char
113scan_peek(const Scanner *scanner) { 168scan_peek(Scanner *scanner) {
114 return sv_peek(&scanner->current); 169 return str_peek(scanner->str);
115} 170}
116 171
117bool 172void
118scan_has_next(const Scanner *scanner) { 173scan_skip_line(Scanner *scanner) {
119 return scanner->current.n != 0; 174 SearchResult newline = array_find_next(scanner->str, cstr("\n"));
175 if (newline.found) {
176 scanner->str.mem += newline.pos + 1;
177 scanner->str.size -= newline.pos + 1;
178 scanner->line++;
179 scanner->col = 0;
180 }
120} 181}
121 182
122void 183void
123skip_whitespace(Scanner *scanner) { 184scan_skip_whitespace(Scanner *scanner) {
124 while (scan_has_next(scanner)) { 185 while (scan_has_next(scanner)) {
125 char c = scan_peek(scanner); 186 char c = scan_peek(scanner);
126 switch (c) { 187 switch (c) {
127 case ' ': 188 case ' ':
189 case ',': // Commas are just syntactic sugar.
128 case '\f': 190 case '\f':
129 case '\n': 191 case '\n':
130 case '\r': 192 case '\r':
@@ -132,6 +194,10 @@ skip_whitespace(Scanner *scanner) {
132 case '\v': { 194 case '\v': {
133 scan_next(scanner); 195 scan_next(scanner);
134 } break; 196 } break;
197 case ';': {
198 // Found a comment! (skip)
199 scan_skip_line(scanner);
200 } break;
135 default: { 201 default: {
136 return; 202 return;
137 } break; 203 } break;
@@ -140,22 +206,33 @@ skip_whitespace(Scanner *scanner) {
140} 206}
141 207
142bool 208bool
143is_delimiter(char c) { 209scan_is_valid_split(char c) {
144 switch (c) { 210 switch (c) {
145 case EOF:
146 case '\0':
147 case ';': 211 case ';':
148 case '"':
149 case '\'':
150 case '(': 212 case '(':
151 case ')': 213 case ')':
152 case '[': 214 case '[':
153 case ']': 215 case ']':
154 case '{': 216 case '{':
155 case '}': 217 case '}':
218 case '+':
219 case '-':
220 case '*':
221 case '/':
222 case '%':
223 case '!':
224 case '=':
225 case '<':
226 case '>':
227 case '~':
228 case '&':
229 case '|':
156 case ':': 230 case ':':
231 case '.':
157 case '@': 232 case '@':
233 case '"':
158 case ' ': 234 case ' ':
235 case ',':
159 case '\f': 236 case '\f':
160 case '\n': 237 case '\n':
161 case '\r': 238 case '\r':
@@ -167,122 +244,351 @@ is_delimiter(char c) {
167 return false; 244 return false;
168} 245}
169 246
170TokenType 247void
171find_token_type(const StringView value) { 248scan_skip_until_valid(Scanner *scanner) {
172 for (size_t i = 0; i < sizeof(keywords) / sizeof(Keyword); i++) { 249 while (scan_has_next(scanner)) {
173 StringView keyword = (StringView){keywords[i].str, keywords[i].n}; 250 char c = scan_peek(scanner);
174 if (sv_equal(&value, &keyword)) { 251 if (scan_is_valid_split(c)) {
175 return keywords[i].token; 252 return;
176 } 253 }
254 scan_next(scanner);
177 } 255 }
178 return TOKEN_SYMBOL;
179} 256}
180 257
181void 258Token
182print_tokens(Token *tokens) { 259emit_token(Scanner current, Scanner *scanner, TokenType t) {
183 for (size_t i = 0; i < array_size(tokens); i++) { 260 Str val = current.str;
184 print_token(tokens[i]); 261 val.size = current.str.size - scanner->str.size;
185 } 262 val.size = val.size < 0 ? 0 : val.size;
263 return (Token){
264 .val = val,
265 .line = current.line + 1,
266 .col = current.col + 1,
267 .type = t,
268 };
186} 269}
187 270
188Token * 271Token
189tokenize(const StringView *sv) { 272emit_token_err(Scanner *scanner, Str err_msg) {
190 Token *tokens = NULL; 273 return (Token){
191 array_init(tokens, 1); 274 .line = scanner->line + 1,
192 Scanner scanner = (Scanner){ 275 .col = scanner->col + 1,
193 .current = *sv, 276 .val = err_msg,
194 .line_number = 1, 277 .type = TOK_UNKNOWN,
195 .col_number = 1,
196 }; 278 };
279}
197 280
198 while (scan_has_next(&scanner)) { 281Token
199 skip_whitespace(&scanner); 282emit_token_number(Scanner *scanner) {
200 size_t line = scanner.line_number; 283 Scanner current = *scanner;
201 size_t col = scanner.col_number; 284 char c = scan_peek(scanner);
202 size_t offset = scanner.offset; 285 if (c == '+' || c == '-') {
203 Token token = (Token){ 286 scan_next(scanner);
204 .type = TOKEN_UNKNOWN, 287 if (str_has_prefix(scanner->str, cstr("0b")) ||
205 .line = line, 288 str_has_prefix(scanner->str, cstr("0x"))) {
206 .col = col, 289 scan_skip_until_valid(scanner);
207 }; 290 return emit_token_err(
208 char c = scan_next(&scanner); 291 &current,
209 switch (c) { 292 cstr("malformed number: binary/hex numbers can't be signed"));
210 case ';': { 293 }
211 while ((c = scan_next(&scanner)) != '\n' && c != '\0') {} 294 }
295 if (str_has_prefix(scanner->str, cstr("0b"))) {
296 scan_next(scanner);
297 scan_next(scanner);
298 while (scan_has_next(scanner)) {
299 c = scan_peek(scanner);
300 if (c == '0' || c == '1' || c == '_') {
301 scan_next(scanner);
212 continue; 302 continue;
213 } break; 303 }
214 case '"': { 304 if (scan_is_valid_split(c)) {
215 char prev = c; 305 return emit_token(current, scanner, TOK_NUMBER);
216 bool found = false; 306 }
217 size_t n = 0; 307 scan_skip_until_valid(scanner);
218 while (scan_has_next(&scanner)) { 308 return emit_token_err(
219 c = scan_next(&scanner); 309 &current, cstr("malformed number: invalid binary number"));
220 if (c == '"' && prev != '\\') { 310 }
221 found = true; 311 } else if (str_has_prefix(scanner->str, cstr("0x"))) {
222 break; 312 scan_next(scanner);
223 } 313 scan_next(scanner);
224 prev = c; 314 while (scan_has_next(scanner)) {
225 n++; 315 c = scan_peek(scanner);
226 } 316 if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
227 if (!found) { 317 (c >= 'A' && c <= 'F') || c == '_') {
228 push_error(ERR_TYPE_LEXER, ERR_UNMATCHED_STRING, line, col); 318 scan_next(scanner);
229 return tokens; 319 continue;
230 } 320 }
231 token.value = (StringView){ 321 if (scan_is_valid_split(c)) {
232 .start = &sv->start[offset + 1], 322 return emit_token(current, scanner, TOK_NUMBER);
233 .n = n, 323 }
234 }; 324 scan_skip_until_valid(scanner);
235 token.type = TOKEN_STRING; 325 return emit_token_err(&current,
236 } break; 326 cstr("malformed number: invalid hex number"));
237 case '(': { token.type = TOKEN_LPAREN; } break; 327 }
238 case ')': { token.type = TOKEN_RPAREN; } break; 328 } else {
239 case '[': { token.type = TOKEN_LSQUARE; } break; 329 // Integral.
240 case ']': { token.type = TOKEN_RSQUARE; } break; 330 while (scan_has_next(scanner)) {
241 case '{': { token.type = TOKEN_LCURLY; } break; 331 c = scan_peek(scanner);
242 case '}': { token.type = TOKEN_RCURLY; } break; 332 if (c == '.') {
243 case ':': { token.type = TOKEN_COLON; } break; 333 scan_next(scanner);
244 case '.': { token.type = TOKEN_DOT; } break; 334 break;
245 case '@': { token.type = TOKEN_AT; } break; 335 }
246 default: { 336 if ((c >= '0' && c <= '9') || c == '_') {
247 if (c == EOF || c == '\0') { 337 scan_next(scanner);
248 token.type = TOKEN_EOF; 338 continue;
249 break; 339 }
250 } 340 if (scan_is_valid_split(c)) {
251 size_t n = 1; 341 return emit_token(current, scanner, TOK_NUMBER);
252 bool num = c == '-' && !is_delimiter(scan_peek(&scanner)); 342 }
253 num = num || (c == '+' && !is_delimiter(scan_peek(&scanner))); 343 scan_skip_until_valid(scanner);
254 num = num || (c >= '0' && c <= '9'); 344 return emit_token_err(&current, cstr("malformed number"));
255 if (num) { 345 }
256 while (!is_delimiter(scan_peek(&scanner))) { 346 c = scan_peek(scanner);
257 c = scan_next(&scanner); 347 if (!(c >= '0' && c <= '9')) {
258 n++; 348 return emit_token_err(&current,
259 } 349 cstr("malformed number: no decimal digits"));
260 token.value = (StringView){ 350 }
261 .start = &sv->start[offset], 351 // Decimals.
262 .n = n, 352 while (scan_has_next(scanner)) {
263 }; 353 c = scan_peek(scanner);
264 token.type = TOKEN_NUMBER; 354 if (c == 'e' || c == 'E') {
265 } else { 355 scan_next(scanner);
266 while (!is_delimiter(scan_peek(&scanner))) { 356 break;
267 if (scan_peek(&scanner) == '.') { 357 }
268 break; 358 if ((c >= '0' && c <= '9') || c == '_') {
269 } 359 scan_next(scanner);
270 c = scan_next(&scanner); 360 continue;
271 n++; 361 }
272 } 362 if (scan_is_valid_split(c)) {
273 token.value = (StringView){ 363 return emit_token(current, scanner, TOK_NUMBER);
274 .start = &sv->start[offset], 364 }
275 .n = n, 365 scan_skip_until_valid(scanner);
276 }; 366 return emit_token_err(&current, cstr("malformed number"));
277 token.type = find_token_type(token.value);
278 }
279 } break;
280 } 367 }
281 if (token.type == TOKEN_UNKNOWN) { 368 // Exponent.
282 push_error(ERR_TYPE_LEXER, ERR_UNKNOWN_TOK_TYPE, line, col); 369 c = scan_peek(scanner);
283 return tokens; 370 if (c == '+' || c == '-') {
371 scan_next(scanner);
284 } 372 }
285 array_push(tokens, token); 373 while (scan_has_next(scanner)) {
374 c = scan_peek(scanner);
375 if ((c >= '0' && c <= '9') || c == '_') {
376 scan_next(scanner);
377 continue;
378 }
379 if (c == '.') {
380 scan_next(scanner);
381 return emit_token_err(
382 &current,
383 cstr("malformed number: decimals not allowed on exponent"));
384 }
385 if (scan_is_valid_split(c)) {
386 return emit_token(current, scanner, TOK_NUMBER);
387 }
388 scan_skip_until_valid(scanner);
389 return emit_token_err(&current, cstr("malformed number"));
390 }
391 }
392 return emit_token_err(&current, cstr("malformed number"));
393}
394
395Token
396scan_token(Scanner *scanner) {
397 assert(scanner);
398
399 scan_skip_whitespace(scanner);
400 if (!scan_has_next(scanner)) {
401 return emit_token(*scanner, scanner, TOK_EOF);
402 }
403
404 Scanner current = *scanner;
405 char c = scan_next(scanner);
406 switch (c) {
407 case '(':
408 return emit_token(current, scanner, TOK_LPAREN);
409 case ')':
410 return emit_token(current, scanner, TOK_RPAREN);
411 case '[':
412 return emit_token(current, scanner, TOK_LSQUARE);
413 case ']':
414 return emit_token(current, scanner, TOK_RSQUARE);
415 case '{':
416 return emit_token(current, scanner, TOK_LCURLY);
417 case '}':
418 return emit_token(current, scanner, TOK_RCURLY);
419 case '+': {
420 char p = scan_peek(scanner);
421 if (p >= '0' && p <= '9') {
422 *scanner = current;
423 return emit_token_number(scanner);
424 }
425 return emit_token(current, scanner, TOK_ADD);
426 };
427 case '-': {
428 char p = scan_peek(scanner);
429 if (p >= '0' && p <= '9') {
430 *scanner = current;
431 return emit_token_number(scanner);
432 }
433 return emit_token(current, scanner, TOK_ADD);
434 };
435 case '*':
436 return emit_token(current, scanner, TOK_MUL);
437 case '/':
438 return emit_token(current, scanner, TOK_DIV);
439 case '%':
440 return emit_token(current, scanner, TOK_MOD);
441 case '!': {
442 if (scan_peek(scanner) == '=') {
443 scan_next(scanner);
444 return emit_token(current, scanner, TOK_NOTEQ);
445 }
446 return emit_token(current, scanner, TOK_NOT);
447 };
448 case '=': {
449 if (scan_peek(scanner) == '=') {
450 scan_next(scanner);
451 return emit_token(current, scanner, TOK_EQ);
452 }
453 return emit_token(current, scanner, TOK_ASSIGN);
454 };
455 case '<': {
456 char p = scan_peek(scanner);
457 if (p == '=') {
458 scan_next(scanner);
459 return emit_token(current, scanner, TOK_LE);
460 }
461 if (p == '<') {
462 scan_next(scanner);
463 return emit_token(current, scanner, TOK_BITLSHIFT);
464 }
465 return emit_token(current, scanner, TOK_LT);
466 };
467 case '>': {
468 char p = scan_peek(scanner);
469 if (p == '=') {
470 scan_next(scanner);
471 return emit_token(current, scanner, TOK_GE);
472 }
473 if (p == '>') {
474 scan_next(scanner);
475 return emit_token(current, scanner, TOK_BITRSHIFT);
476 }
477 return emit_token(current, scanner, TOK_GT);
478 };
479 case '~':
480 return emit_token(current, scanner, TOK_BITNOT);
481 case '&': {
482 if (scan_peek(scanner) == '&') {
483 scan_next(scanner);
484 return emit_token(current, scanner, TOK_AND);
485 }
486 return emit_token(current, scanner, TOK_BITAND);
487 };
488 case '|': {
489 if (scan_peek(scanner) == '|') {
490 scan_next(scanner);
491 return emit_token(current, scanner, TOK_OR);
492 }
493 return emit_token(current, scanner, TOK_BITOR);
494 };
495 case ':':
496 return emit_token(current, scanner, TOK_COLON);
497 case '.':
498 return emit_token(current, scanner, TOK_DOT);
499 case '@':
500 return emit_token(current, scanner, TOK_AT);
501 case '"': {
502 while (scan_has_next(scanner)) {
503 c = scan_next(scanner);
504 if (c == '\\') {
505 scan_next(scanner);
506 continue;
507 }
508 if (c == '"') {
509 return emit_token(current, scanner, TOK_STRING);
510 }
511 }
512 return emit_token_err(&current, cstr("mismatched string quotes"));
513 };
514 }
515 if (c >= '0' && c <= '9') {
516 *scanner = current;
517 return emit_token_number(scanner);
518 }
519
520 scan_skip_until_valid(scanner);
521 Str val = current.str;
522 val.size = current.str.size - scanner->str.size;
523 val.size = val.size < 0 ? 0 : val.size;
524 if (val.size == 0) {
525 return emit_token_err(&current, cstr("unexpected character"));
526 }
527 switch (val.mem[0]) {
528 case 'b': {
529 if (str_has_prefix(val, cstr("break"))) {
530 return emit_token(current, scanner, TOK_BREAK);
531 }
532 } break;
533 case 'c': {
534 if (str_has_prefix(val, cstr("case"))) {
535 return emit_token(current, scanner, TOK_CASE);
536 }
537 if (str_has_prefix(val, cstr("continue"))) {
538 return emit_token(current, scanner, TOK_CONTINUE);
539 }
540 } break;
541 case 'f': {
542 if (str_has_prefix(val, cstr("false"))) {
543 return emit_token(current, scanner, TOK_FALSE);
544 }
545 if (str_has_prefix(val, cstr("fun"))) {
546 return emit_token(current, scanner, TOK_FUN);
547 }
548 } break;
549 case 'i': {
550 if (str_has_prefix(val, cstr("if"))) {
551 return emit_token(current, scanner, TOK_IF);
552 }
553 } break;
554 case 'l': {
555 if (str_has_prefix(val, cstr("let"))) {
556 return emit_token(current, scanner, TOK_LET);
557 }
558 } break;
559 case 'm': {
560 if (str_has_prefix(val, cstr("match"))) {
561 return emit_token(current, scanner, TOK_MATCH);
562 }
563 } break;
564 case 'n': {
565 if (str_has_prefix(val, cstr("nil"))) {
566 return emit_token(current, scanner, TOK_NIL);
567 }
568 } break;
569 case 'r': {
570 if (str_has_prefix(val, cstr("return"))) {
571 return emit_token(current, scanner, TOK_RETURN);
572 }
573 } break;
574 case 's': {
575 if (str_has_prefix(val, cstr("set"))) {
576 return emit_token(current, scanner, TOK_SET);
577 }
578 if (str_has_prefix(val, cstr("struct"))) {
579 return emit_token(current, scanner, TOK_STRUCT);
580 }
581 } break;
582 case 't': {
583 if (str_has_prefix(val, cstr("true"))) {
584 return emit_token(current, scanner, TOK_TRUE);
585 }
586 } break;
587 case 'w': {
588 if (str_has_prefix(val, cstr("while"))) {
589 return emit_token(current, scanner, TOK_WHILE);
590 }
591 } break;
286 } 592 }
287 return tokens; 593 return emit_token(current, scanner, TOK_SYMBOL);
288} 594}
diff --git a/src/lexer.h b/src/lexer.h
deleted file mode 100644
index 949abaf..0000000
--- a/src/lexer.h
+++ /dev/null
@@ -1,99 +0,0 @@
1#ifndef BDL_LEXER_H
2#define BDL_LEXER_H
3
4#include "string_view.h"
5
6typedef enum TokenType {
7 TOKEN_UNKNOWN = 0,
8
9 // Parentheses.
10 TOKEN_LPAREN,
11 TOKEN_RPAREN,
12 TOKEN_LSQUARE,
13 TOKEN_RSQUARE,
14 TOKEN_LCURLY,
15 TOKEN_RCURLY,
16
17 // Primitive types.
18 TOKEN_NUMBER,
19 TOKEN_SYMBOL,
20 TOKEN_STRING,
21 TOKEN_NIL,
22 TOKEN_TRUE,
23 TOKEN_FALSE,
24
25 // Keywords.
26 TOKEN_LAMBDA,
27 TOKEN_IF,
28 TOKEN_DEF,
29 TOKEN_SET,
30 TOKEN_FUN,
31 TOKEN_STRUCT,
32
33 // Arithmetic ops.
34 TOKEN_ADD,
35 TOKEN_SUB,
36 TOKEN_MUL,
37 TOKEN_DIV,
38 TOKEN_MOD,
39
40 // Boolean operations.
41 TOKEN_NOT,
42 TOKEN_AND,
43 TOKEN_OR,
44 TOKEN_EQ,
45 TOKEN_LT,
46 TOKEN_GT,
47 TOKEN_LE,
48 TOKEN_GE,
49
50 // Special operators.
51 TOKEN_COLON,
52 TOKEN_DOT,
53 TOKEN_AT,
54
55 // End of file.
56 TOKEN_EOF,
57} TokenType;
58
59typedef struct Token {
60 TokenType type;
61 StringView value;
62 size_t line;
63 size_t col;
64} Token;
65
66typedef struct Scanner {
67 StringView current;
68 size_t line_number;
69 size_t col_number;
70 size_t offset;
71} Scanner;
72
73// Print a token to standard output for debugging purposes.
74void print_token(Token tok);
75
76// Same functionality as with StringView, but keeping track of line and column
77// numbers.
78char scan_next(Scanner *scanner);
79char scan_peek(const Scanner *scanner);
80
81// Check if the current scanner still have characters left.
82bool scan_has_next(const Scanner *scanner);
83
84// Advance the scanner until we ran out of whitespace.
85void skip_whitespace(Scanner *scanner);
86
87// Check if a given character is a delimiter.
88bool is_delimiter(char c);
89
90// Extract the token type from the current string.
91TokenType find_token_type(const StringView value);
92
93// Generate a list of tokens from the given string.
94Token * tokenize(const StringView *sv);
95
96// Display tokens from token list.
97void print_tokens(Token *tokens);
98
99#endif // BDL_LEXER_H
diff --git a/src/main.c b/src/main.c
index edd70aa..9848b8b 100644
--- a/src/main.c
+++ b/src/main.c
@@ -3,6 +3,7 @@
3#include <stdlib.h> 3#include <stdlib.h>
4 4
5#include "badlib.h" 5#include "badlib.h"
6#include "lexer.c"
6 7
7typedef enum ExecMode { 8typedef enum ExecMode {
8 RUN_NORMAL, 9 RUN_NORMAL,
@@ -14,607 +15,11 @@ typedef enum ExecMode {
14 15
15static ExecMode mode = RUN_NORMAL; 16static ExecMode mode = RUN_NORMAL;
16 17
17#define LEXER_MEM GB(2)
18
19void 18void
20init(void) { 19init(void) {
21 log_init_default(); 20 log_init_default();
22} 21}
23 22
24typedef enum TokenType {
25 TOK_UNKNOWN = 0,
26
27 // Parentheses.
28 TOK_LPAREN, // (
29 TOK_RPAREN, // )
30 TOK_LSQUARE, // [
31 TOK_RSQUARE, // ]
32 TOK_LCURLY, // {
33 TOK_RCURLY, // }
34
35 // Basic literals.
36 TOK_NUMBER,
37 TOK_SYMBOL,
38 TOK_STRING,
39
40 // Keywords.
41 TOK_BREAK, // break
42 TOK_CASE, // case
43 TOK_CONTINUE, // continue
44 TOK_FALSE, // false
45 TOK_FUN, // fun
46 TOK_IF, // if
47 TOK_LET, // let
48 TOK_MATCH, // match
49 TOK_NIL, // nil
50 TOK_RETURN, // return
51 TOK_SET, // set
52 TOK_STRUCT, // struct
53 TOK_TRUE, // true
54 TOK_WHILE, // while
55
56 // Arithmetic ops.
57 TOK_ADD, // +
58 TOK_SUB, // -
59 TOK_MUL, // *
60 TOK_DIV, // /
61 TOK_MOD, // %
62
63 // Logical ops.
64 TOK_NOT, // !
65 TOK_AND, // &&
66 TOK_OR, // ||
67 TOK_EQ, // ==
68 TOK_NOTEQ, // !=
69 TOK_LT, // <
70 TOK_GT, // >
71 TOK_LE, // <=
72 TOK_GE, // >=
73
74 // Bitwise ops.
75 TOK_BITNOT, // ~
76 TOK_BITAND, // &
77 TOK_BITOR, // |
78 TOK_BITLSHIFT, // <<
79 TOK_BITRSHIFT, // >>
80
81 // Special ops.
82 TOK_COLON, // :
83 TOK_DOT, // .
84 TOK_AT, // @
85 TOK_ASSIGN, // =
86
87 // End of file.
88 TOK_EOF,
89} TokenType;
90
91Str token_str[] = {
92 [TOK_UNKNOWN] = cstr("UNKNOWN"),
93
94 // Parentheses.
95 [TOK_LPAREN] = cstr("LPAREN"),
96 [TOK_RPAREN] = cstr("RPAREN"),
97 [TOK_LSQUARE] = cstr("LSQUARE"),
98 [TOK_RSQUARE] = cstr("RSQUARE"),
99 [TOK_LCURLY] = cstr("LCURLY"),
100 [TOK_RCURLY] = cstr("RCURLY"),
101
102 // Basic literals.
103 [TOK_NUMBER] = cstr("NUMBER"),
104 [TOK_SYMBOL] = cstr("SYMBOL"),
105 [TOK_STRING] = cstr("STRING"),
106
107 // Keywords.
108 [TOK_BREAK] = cstr("BREAK"),
109 [TOK_CASE] = cstr("CASE"),
110 [TOK_CONTINUE] = cstr("CONTINUE"),
111 [TOK_FALSE] = cstr("FALSE"),
112 [TOK_FUN] = cstr("FUN"),
113 [TOK_IF] = cstr("IF"),
114 [TOK_LET] = cstr("LET"),
115 [TOK_MATCH] = cstr("MATCH"),
116 [TOK_NIL] = cstr("NIL"),
117 [TOK_RETURN] = cstr("RETURN"),
118 [TOK_SET] = cstr("SET"),
119 [TOK_STRUCT] = cstr("STRUCT"),
120 [TOK_TRUE] = cstr("TRUE"),
121 [TOK_WHILE] = cstr("WHILE"),
122
123 // Arithmetic ops.
124 [TOK_ADD] = cstr("ADD"),
125 [TOK_SUB] = cstr("SUB"),
126 [TOK_MUL] = cstr("MUL"),
127 [TOK_DIV] = cstr("DIV"),
128 [TOK_MOD] = cstr("MOD"),
129
130 // Logical ops.
131 [TOK_NOT] = cstr("NOT"),
132 [TOK_AND] = cstr("AND"),
133 [TOK_OR] = cstr("OR"),
134 [TOK_EQ] = cstr("EQ"),
135 [TOK_NOTEQ] = cstr("NOTEQ"),
136 [TOK_LT] = cstr("LT"),
137 [TOK_GT] = cstr("GT"),
138 [TOK_LE] = cstr("LE"),
139 [TOK_GE] = cstr("GE"),
140
141 // Bitwise ops.
142 [TOK_BITNOT] = cstr("BITNOT"),
143 [TOK_BITAND] = cstr("BITAND"),
144 [TOK_BITOR] = cstr("BITOR"),
145 [TOK_BITLSHIFT] = cstr("BITLSHIFT"),
146 [TOK_BITRSHIFT] = cstr("BITRSHIFT"),
147
148 // Special ops.
149 [TOK_COLON] = cstr("COLON"),
150 [TOK_DOT] = cstr("DOT"),
151 [TOK_AT] = cstr("AT"),
152 [TOK_ASSIGN] = cstr("ASSIGN"),
153
154 // End of file.
155 [TOK_EOF] = cstr("EOF"),
156};
157
158typedef struct Token {
159 TokenType type;
160 Str val;
161 sz line;
162 sz col;
163} Token;
164
165typedef struct Scanner {
166 Str str;
167 sz line;
168 sz col;
169 Arena *storage;
170} Scanner;
171
172char
173scan_next(Scanner *scanner) {
174 char c = str_next(&scanner->str);
175 if (c == '\n') {
176 scanner->line++;
177 scanner->col = 0;
178 } else {
179 scanner->col++;
180 }
181 return c;
182}
183
184bool
185scan_has_next(Scanner *scanner) {
186 return scanner->str.size;
187}
188
189char
190scan_peek(Scanner *scanner) {
191 return str_peek(scanner->str);
192}
193
194Token
195emit_token(Scanner current, Scanner *scanner, TokenType t) {
196 Str val = current.str;
197 val.size = current.str.size - scanner->str.size;
198 val.size = val.size < 0 ? 0 : val.size;
199 return (Token){
200 .val = val,
201 .line = current.line + 1,
202 .col = current.col + 1,
203 .type = t,
204 };
205}
206
207Token
208emit_token_err(Scanner *scanner, Str err_msg) {
209 return (Token){
210 .line = scanner->line + 1,
211 .col = scanner->col + 1,
212 .val = err_msg,
213 .type = TOK_UNKNOWN,
214 };
215}
216
217void
218scan_skip_line(Scanner *scanner) {
219 SearchResult newline = array_find_next(scanner->str, cstr("\n"));
220 if (newline.found) {
221 scanner->str.mem += newline.pos + 1;
222 scanner->str.size -= newline.pos + 1;
223 scanner->line++;
224 scanner->col = 0;
225 }
226}
227
228void
229scan_skip_whitespace(Scanner *scanner) {
230 while (scan_has_next(scanner)) {
231 char c = scan_peek(scanner);
232 switch (c) {
233 case ' ':
234 case ',': // Commas are just syntactic sugar.
235 case '\f':
236 case '\n':
237 case '\r':
238 case '\t':
239 case '\v': {
240 scan_next(scanner);
241 } break;
242 case ';': {
243 // Found a comment! (skip)
244 scan_skip_line(scanner);
245 } break;
246 default: {
247 return;
248 } break;
249 }
250 }
251}
252
253bool
254is_valid_split(char c) {
255 switch (c) {
256 case ';':
257 case '(':
258 case ')':
259 case '[':
260 case ']':
261 case '{':
262 case '}':
263 case '+':
264 case '-':
265 case '*':
266 case '/':
267 case '%':
268 case '!':
269 case '=':
270 case '<':
271 case '>':
272 case '~':
273 case '&':
274 case '|':
275 case ':':
276 case '.':
277 case '@':
278 case '"':
279 case ' ':
280 case ',':
281 case '\f':
282 case '\n':
283 case '\r':
284 case '\t':
285 case '\v': {
286 return true;
287 } break;
288 }
289 return false;
290}
291
292void
293scan_skip_until_valid(Scanner *scanner) {
294 while (scan_has_next(scanner)) {
295 char c = scan_peek(scanner);
296 if (is_valid_split(c)) {
297 return;
298 }
299 scan_next(scanner);
300 }
301}
302
303Token
304emit_token_number(Scanner *scanner) {
305 Scanner current = *scanner;
306 char c = scan_peek(scanner);
307 if (c == '+' || c == '-') {
308 scan_next(scanner);
309 if (str_has_prefix(scanner->str, cstr("0b")) ||
310 str_has_prefix(scanner->str, cstr("0x"))) {
311 scan_skip_until_valid(scanner);
312 return emit_token_err(
313 &current,
314 cstr("malformed number: binary/hex numbers can't be signed"));
315 }
316 }
317 if (str_has_prefix(scanner->str, cstr("0b"))) {
318 scan_next(scanner);
319 scan_next(scanner);
320 while (scan_has_next(scanner)) {
321 c = scan_peek(scanner);
322 if (c == '0' || c == '1' || c == '_') {
323 scan_next(scanner);
324 continue;
325 }
326 if (is_valid_split(c)) {
327 return emit_token(current, scanner, TOK_NUMBER);
328 }
329 scan_skip_until_valid(scanner);
330 return emit_token_err(
331 &current, cstr("malformed number: invalid binary number"));
332 }
333 } else if (str_has_prefix(scanner->str, cstr("0x"))) {
334 scan_next(scanner);
335 scan_next(scanner);
336 while (scan_has_next(scanner)) {
337 c = scan_peek(scanner);
338 if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
339 (c >= 'A' && c <= 'F') || c == '_') {
340 scan_next(scanner);
341 continue;
342 }
343 if (is_valid_split(c)) {
344 return emit_token(current, scanner, TOK_NUMBER);
345 }
346 scan_skip_until_valid(scanner);
347 return emit_token_err(&current,
348 cstr("malformed number: invalid hex number"));
349 }
350 } else {
351 // Integral.
352 while (scan_has_next(scanner)) {
353 c = scan_peek(scanner);
354 if (c == '.') {
355 scan_next(scanner);
356 break;
357 }
358 if ((c >= '0' && c <= '9') || c == '_') {
359 scan_next(scanner);
360 continue;
361 }
362 if (is_valid_split(c)) {
363 return emit_token(current, scanner, TOK_NUMBER);
364 }
365 scan_skip_until_valid(scanner);
366 return emit_token_err(&current, cstr("malformed number"));
367 }
368 c = scan_peek(scanner);
369 if (!(c >= '0' && c <= '9')) {
370 return emit_token_err(&current,
371 cstr("malformed number: no decimal digits"));
372 }
373 // Decimals.
374 while (scan_has_next(scanner)) {
375 c = scan_peek(scanner);
376 if (c == 'e' || c == 'E') {
377 scan_next(scanner);
378 break;
379 }
380 if ((c >= '0' && c <= '9') || c == '_') {
381 scan_next(scanner);
382 continue;
383 }
384 if (is_valid_split(c)) {
385 return emit_token(current, scanner, TOK_NUMBER);
386 }
387 scan_skip_until_valid(scanner);
388 return emit_token_err(&current, cstr("malformed number"));
389 }
390 // Exponent.
391 c = scan_peek(scanner);
392 if (c == '+' || c == '-') {
393 scan_next(scanner);
394 }
395 while (scan_has_next(scanner)) {
396 c = scan_peek(scanner);
397 if ((c >= '0' && c <= '9') || c == '_') {
398 scan_next(scanner);
399 continue;
400 }
401 if (c == '.') {
402 scan_next(scanner);
403 return emit_token_err(
404 &current,
405 cstr("malformed number: decimals not allowed on exponent"));
406 }
407 if (is_valid_split(c)) {
408 return emit_token(current, scanner, TOK_NUMBER);
409 }
410 scan_skip_until_valid(scanner);
411 return emit_token_err(&current, cstr("malformed number"));
412 }
413 }
414 return emit_token_err(&current, cstr("malformed number"));
415}
416
417Token
418scan_token(Scanner *scanner) {
419 assert(scanner);
420
421 scan_skip_whitespace(scanner);
422 if (!scan_has_next(scanner)) {
423 return emit_token(*scanner, scanner, TOK_EOF);
424 }
425
426 Scanner current = *scanner;
427 char c = scan_next(scanner);
428 switch (c) {
429 case '(':
430 return emit_token(current, scanner, TOK_LPAREN);
431 case ')':
432 return emit_token(current, scanner, TOK_RPAREN);
433 case '[':
434 return emit_token(current, scanner, TOK_LSQUARE);
435 case ']':
436 return emit_token(current, scanner, TOK_RSQUARE);
437 case '{':
438 return emit_token(current, scanner, TOK_LCURLY);
439 case '}':
440 return emit_token(current, scanner, TOK_RCURLY);
441 case '+': {
442 char p = scan_peek(scanner);
443 if (p >= '0' && p <= '9') {
444 *scanner = current;
445 return emit_token_number(scanner);
446 }
447 return emit_token(current, scanner, TOK_ADD);
448 };
449 case '-': {
450 char p = scan_peek(scanner);
451 if (p >= '0' && p <= '9') {
452 *scanner = current;
453 return emit_token_number(scanner);
454 }
455 return emit_token(current, scanner, TOK_ADD);
456 };
457 case '*':
458 return emit_token(current, scanner, TOK_MUL);
459 case '/':
460 return emit_token(current, scanner, TOK_DIV);
461 case '%':
462 return emit_token(current, scanner, TOK_MOD);
463 case '!': {
464 if (scan_peek(scanner) == '=') {
465 scan_next(scanner);
466 return emit_token(current, scanner, TOK_NOTEQ);
467 }
468 return emit_token(current, scanner, TOK_NOT);
469 };
470 case '=': {
471 if (scan_peek(scanner) == '=') {
472 scan_next(scanner);
473 return emit_token(current, scanner, TOK_EQ);
474 }
475 return emit_token(current, scanner, TOK_ASSIGN);
476 };
477 case '<': {
478 char p = scan_peek(scanner);
479 if (p == '=') {
480 scan_next(scanner);
481 return emit_token(current, scanner, TOK_LE);
482 }
483 if (p == '<') {
484 scan_next(scanner);
485 return emit_token(current, scanner, TOK_BITLSHIFT);
486 }
487 return emit_token(current, scanner, TOK_LT);
488 };
489 case '>': {
490 char p = scan_peek(scanner);
491 if (p == '=') {
492 scan_next(scanner);
493 return emit_token(current, scanner, TOK_GE);
494 }
495 if (p == '>') {
496 scan_next(scanner);
497 return emit_token(current, scanner, TOK_BITRSHIFT);
498 }
499 return emit_token(current, scanner, TOK_GT);
500 };
501 case '~':
502 return emit_token(current, scanner, TOK_BITNOT);
503 case '&': {
504 if (scan_peek(scanner) == '&') {
505 scan_next(scanner);
506 return emit_token(current, scanner, TOK_AND);
507 }
508 return emit_token(current, scanner, TOK_BITAND);
509 };
510 case '|': {
511 if (scan_peek(scanner) == '|') {
512 scan_next(scanner);
513 return emit_token(current, scanner, TOK_OR);
514 }
515 return emit_token(current, scanner, TOK_BITOR);
516 };
517 case ':':
518 return emit_token(current, scanner, TOK_COLON);
519 case '.':
520 return emit_token(current, scanner, TOK_DOT);
521 case '@':
522 return emit_token(current, scanner, TOK_AT);
523 case '"': {
524 while (scan_has_next(scanner)) {
525 c = scan_next(scanner);
526 if (c == '\\') {
527 scan_next(scanner);
528 continue;
529 }
530 if (c == '"') {
531 return emit_token(current, scanner, TOK_STRING);
532 }
533 }
534 return emit_token_err(&current, cstr("mismatched string quotes"));
535 };
536 }
537 if (c >= '0' && c <= '9') {
538 *scanner = current;
539 return emit_token_number(scanner);
540 }
541
542 scan_skip_until_valid(scanner);
543 Str val = current.str;
544 val.size = current.str.size - scanner->str.size;
545 val.size = val.size < 0 ? 0 : val.size;
546 if (val.size == 0) {
547 return emit_token_err(&current, cstr("unexpected character"));
548 }
549 switch (val.mem[0]) {
550 case 'b': {
551 if (str_has_prefix(val, cstr("break"))) {
552 return emit_token(current, scanner, TOK_BREAK);
553 }
554 } break;
555 case 'c': {
556 if (str_has_prefix(val, cstr("case"))) {
557 return emit_token(current, scanner, TOK_CASE);
558 }
559 if (str_has_prefix(val, cstr("continue"))) {
560 return emit_token(current, scanner, TOK_CONTINUE);
561 }
562 } break;
563 case 'f': {
564 if (str_has_prefix(val, cstr("false"))) {
565 return emit_token(current, scanner, TOK_FALSE);
566 }
567 if (str_has_prefix(val, cstr("fun"))) {
568 return emit_token(current, scanner, TOK_FUN);
569 }
570 } break;
571 case 'i': {
572 if (str_has_prefix(val, cstr("if"))) {
573 return emit_token(current, scanner, TOK_IF);
574 }
575 } break;
576 case 'l': {
577 if (str_has_prefix(val, cstr("let"))) {
578 return emit_token(current, scanner, TOK_LET);
579 }
580 } break;
581 case 'm': {
582 if (str_has_prefix(val, cstr("match"))) {
583 return emit_token(current, scanner, TOK_MATCH);
584 }
585 } break;
586 case 'n': {
587 if (str_has_prefix(val, cstr("nil"))) {
588 return emit_token(current, scanner, TOK_NIL);
589 }
590 } break;
591 case 'r': {
592 if (str_has_prefix(val, cstr("return"))) {
593 return emit_token(current, scanner, TOK_RETURN);
594 }
595 } break;
596 case 's': {
597 if (str_has_prefix(val, cstr("set"))) {
598 return emit_token(current, scanner, TOK_SET);
599 }
600 if (str_has_prefix(val, cstr("struct"))) {
601 return emit_token(current, scanner, TOK_STRUCT);
602 }
603 } break;
604 case 't': {
605 if (str_has_prefix(val, cstr("true"))) {
606 return emit_token(current, scanner, TOK_TRUE);
607 }
608 } break;
609 case 'w': {
610 if (str_has_prefix(val, cstr("while"))) {
611 return emit_token(current, scanner, TOK_WHILE);
612 }
613 } break;
614 }
615 return emit_token(current, scanner, TOK_SYMBOL);
616}
617
618void 23void
619process_file(Str path) { 24process_file(Str path) {
620 Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); 25 Arena lexer_arena = arena_create(LEXER_MEM, os_allocator);
@@ -628,36 +33,24 @@ process_file(Str path) {
628 33
629 Scanner scanner = { 34 Scanner scanner = {
630 .str = file.data, 35 .str = file.data,
631 .storage = &lexer_arena,
632 }; 36 };
633 Token tok = {0}; 37 Token tok = {0};
38 sz errors = 0;
634 while (tok.type != TOK_EOF) { 39 while (tok.type != TOK_EOF) {
635 tok = scan_token(&scanner); 40 tok = scan_token(&scanner);
636 eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col, token_str[tok.type], 41 if (tok.type == TOK_UNKNOWN) {
637 tok.val); 42 eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col,
43 token_str[tok.type], tok.val);
44 errors++;
45 }
638 } 46 }
639 // while (true) {
640 // Token tok = scan_token(&scanner);
641 // println("%s:%d:%d:%s %s", path, tok.line, tok.col,
642 // token_str[tok.type],
643 // tok.val);
644 // if (tok.type == TOK_EOF) break;
645 // }
646 47
647 // Str scanner = file.data; 48 // Only proceed if there are no errors.
648 // // NOTE: Testing file read line by line. 49 if (errors) {
649 // for (sz i = 0; scanner.size != 0; i++) { 50 goto stop;
650 // Str line = str_split(&scanner, cstr("\n")); 51 }
651 // println("%x{4} %s", i + 1, line);
652 // }
653
654 // println("<<< %x{4} %b{4} %f{2} %s %{Arena} >>>", 123, 3, 1.345,
655 // cstr("BOOM!"), &logger_inf.storage);
656 52
657 // println("%{Mem}", &(Array){lexer_arena.beg, lexer_arena.size}); 53stop:
658 // eprintln("%s:%d:%d: %s -> %c", path, 1, 1, cstr("error: testing string
659 // logger"), 'X'); while (true) {}
660 // TODO: run lexer.
661 // Free up resources. 54 // Free up resources.
662 arena_destroy(&lexer_arena, os_allocator); 55 arena_destroy(&lexer_arena, os_allocator);
663} 56}