aboutsummaryrefslogtreecommitdiffstats
path: root/src/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lexer.c')
-rw-r--r--src/lexer.c734
1 files changed, 520 insertions, 214 deletions
diff --git a/src/lexer.c b/src/lexer.c
index a6d7c74..df998f2 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -1,130 +1,192 @@
1#include "lexer.h" 1#define LEXER_MEM GB(2)
2#include "errors.h" 2
3 3typedef enum TokenType {
4static const char* token_str[] = { 4 TOK_UNKNOWN = 0,
5 [TOKEN_UNKNOWN] = "UNKNOWN", 5
6 [TOKEN_LPAREN] = "LPAREN", 6 // Parentheses.
7 [TOKEN_RPAREN] = "RPAREN", 7 TOK_LPAREN, // (
8 [TOKEN_LSQUARE] = "LSQUARE", 8 TOK_RPAREN, // )
9 [TOKEN_RSQUARE] = "RSQUARE", 9 TOK_LSQUARE, // [
10 [TOKEN_LCURLY] = "LCURLY", 10 TOK_RSQUARE, // ]
11 [TOKEN_RCURLY] = "RCURLY", 11 TOK_LCURLY, // {
12 [TOKEN_NUMBER] = "NUMBER", 12 TOK_RCURLY, // }
13 [TOKEN_SYMBOL] = "SYMBOL", 13
14 [TOKEN_STRING] = "STRING", 14 // Basic literals.
15 [TOKEN_NIL] = "NIL", 15 TOK_NUMBER,
16 [TOKEN_TRUE] = "TRUE", 16 TOK_SYMBOL,
17 [TOKEN_FALSE] = "FALSE", 17 TOK_STRING,
18 [TOKEN_LAMBDA] = "LAMBDA", 18
19 [TOKEN_IF] = "IF", 19 // Keywords.
20 [TOKEN_DEF] = "DEF", 20 TOK_BREAK, // break
21 [TOKEN_SET] = "SET", 21 TOK_CASE, // case
22 [TOKEN_FUN] = "FUN", 22 TOK_CONTINUE, // continue
23 [TOKEN_STRUCT] = "STRUCT", 23 TOK_FALSE, // false
24 [TOKEN_ADD] = "ADD", 24 TOK_FUN, // fun
25 [TOKEN_SUB] = "SUB", 25 TOK_IF, // if
26 [TOKEN_MUL] = "MUL", 26 TOK_LET, // let
27 [TOKEN_DIV] = "DIV", 27 TOK_MATCH, // match
28 [TOKEN_MOD] = "MOD", 28 TOK_NIL, // nil
29 [TOKEN_NOT] = "NOT", 29 TOK_RETURN, // return
30 [TOKEN_AND] = "AND", 30 TOK_SET, // set
31 [TOKEN_OR] = "OR", 31 TOK_STRUCT, // struct
32 [TOKEN_EQ] = "EQ", 32 TOK_TRUE, // true
33 [TOKEN_LT] = "LT", 33 TOK_WHILE, // while
34 [TOKEN_GT] = "GT", 34
35 [TOKEN_LE] = "LE", 35 // Arithmetic ops.
36 [TOKEN_GE] = "GE", 36 TOK_ADD, // +
37 [TOKEN_COLON] = "COLON", 37 TOK_SUB, // -
38 [TOKEN_DOT] = "DOT", 38 TOK_MUL, // *
39 [TOKEN_AT] = "AT", 39 TOK_DIV, // /
40 [TOKEN_EOF] = "EOF", 40 TOK_MOD, // %
41}; 41
42 // Logical ops.
43 TOK_NOT, // !
44 TOK_AND, // &&
45 TOK_OR, // ||
46 TOK_EQ, // ==
47 TOK_NOTEQ, // !=
48 TOK_LT, // <
49 TOK_GT, // >
50 TOK_LE, // <=
51 TOK_GE, // >=
52
53 // Bitwise ops.
54 TOK_BITNOT, // ~
55 TOK_BITAND, // &
56 TOK_BITOR, // |
57 TOK_BITLSHIFT, // <<
58 TOK_BITRSHIFT, // >>
59
60 // Special ops.
61 TOK_COLON, // :
62 TOK_DOT, // .
63 TOK_AT, // @
64 TOK_ASSIGN, // =
65
66 // End of file.
67 TOK_EOF,
68} TokenType;
69
70Str token_str[] = {
71 [TOK_UNKNOWN] = cstr("UNKNOWN"),
72
73 // Parentheses.
74 [TOK_LPAREN] = cstr("LPAREN"),
75 [TOK_RPAREN] = cstr("RPAREN"),
76 [TOK_LSQUARE] = cstr("LSQUARE"),
77 [TOK_RSQUARE] = cstr("RSQUARE"),
78 [TOK_LCURLY] = cstr("LCURLY"),
79 [TOK_RCURLY] = cstr("RCURLY"),
80
81 // Basic literals.
82 [TOK_NUMBER] = cstr("NUMBER"),
83 [TOK_SYMBOL] = cstr("SYMBOL"),
84 [TOK_STRING] = cstr("STRING"),
85
86 // Keywords.
87 [TOK_BREAK] = cstr("BREAK"),
88 [TOK_CASE] = cstr("CASE"),
89 [TOK_CONTINUE] = cstr("CONTINUE"),
90 [TOK_FALSE] = cstr("FALSE"),
91 [TOK_FUN] = cstr("FUN"),
92 [TOK_IF] = cstr("IF"),
93 [TOK_LET] = cstr("LET"),
94 [TOK_MATCH] = cstr("MATCH"),
95 [TOK_NIL] = cstr("NIL"),
96 [TOK_RETURN] = cstr("RETURN"),
97 [TOK_SET] = cstr("SET"),
98 [TOK_STRUCT] = cstr("STRUCT"),
99 [TOK_TRUE] = cstr("TRUE"),
100 [TOK_WHILE] = cstr("WHILE"),
101
102 // Arithmetic ops.
103 [TOK_ADD] = cstr("ADD"),
104 [TOK_SUB] = cstr("SUB"),
105 [TOK_MUL] = cstr("MUL"),
106 [TOK_DIV] = cstr("DIV"),
107 [TOK_MOD] = cstr("MOD"),
42 108
43typedef struct Keyword { 109 // Logical ops.
44 char *str; 110 [TOK_NOT] = cstr("NOT"),
45 size_t n; 111 [TOK_AND] = cstr("AND"),
46 TokenType token; 112 [TOK_OR] = cstr("OR"),
47} Keyword; 113 [TOK_EQ] = cstr("EQ"),
48 114 [TOK_NOTEQ] = cstr("NOTEQ"),
49#define KEYWORD(STR,TOK) {(STR), sizeof(STR) - 1, (TOK)} 115 [TOK_LT] = cstr("LT"),
50 116 [TOK_GT] = cstr("GT"),
51static const Keyword keywords[] = { 117 [TOK_LE] = cstr("LE"),
52 KEYWORD("nil", TOKEN_NIL), 118 [TOK_GE] = cstr("GE"),
53 KEYWORD("true", TOKEN_TRUE), 119
54 KEYWORD("false", TOKEN_FALSE), 120 // Bitwise ops.
55 KEYWORD("lambda", TOKEN_LAMBDA), 121 [TOK_BITNOT] = cstr("BITNOT"),
56 KEYWORD("if", TOKEN_IF), 122 [TOK_BITAND] = cstr("BITAND"),
57 KEYWORD("def", TOKEN_DEF), 123 [TOK_BITOR] = cstr("BITOR"),
58 KEYWORD("set", TOKEN_SET), 124 [TOK_BITLSHIFT] = cstr("BITLSHIFT"),
59 KEYWORD("fun", TOKEN_FUN), 125 [TOK_BITRSHIFT] = cstr("BITRSHIFT"),
60 KEYWORD("struct", TOKEN_STRUCT), 126
61 KEYWORD("+", TOKEN_ADD), 127 // Special ops.
62 KEYWORD("-", TOKEN_SUB), 128 [TOK_COLON] = cstr("COLON"),
63 KEYWORD("*", TOKEN_MUL), 129 [TOK_DOT] = cstr("DOT"),
64 KEYWORD("/", TOKEN_DIV), 130 [TOK_AT] = cstr("AT"),
65 KEYWORD("%", TOKEN_MOD), 131 [TOK_ASSIGN] = cstr("ASSIGN"),
66 KEYWORD("not", TOKEN_NOT), 132
67 KEYWORD("and", TOKEN_AND), 133 // End of file.
68 KEYWORD("or", TOKEN_OR), 134 [TOK_EOF] = cstr("EOF"),
69 KEYWORD("=", TOKEN_EQ),
70 KEYWORD("<", TOKEN_LT),
71 KEYWORD(">", TOKEN_GT),
72 KEYWORD("<=", TOKEN_LE),
73 KEYWORD(">=", TOKEN_GE),
74}; 135};
75 136
76void 137typedef struct Token {
77print_token(Token tok) { 138 TokenType type;
78 printf("[%4ld:%-4ld] ", tok.line, tok.col); 139 Str val;
79 printf("%s", token_str[tok.type]); 140 sz line;
80 switch (tok.type) { 141 sz col;
81 case TOKEN_NUMBER: 142} Token;
82 case TOKEN_SYMBOL: 143
83 case TOKEN_STRING: { 144typedef struct Scanner {
84 printf(" -> "); 145 Str str;
85 sv_write(&tok.value); 146 sz line;
86 } break; 147 sz col;
87 default: { 148} Scanner;
88 } break;
89 }
90 printf("\n");
91}
92 149
93char 150char
94scan_next(Scanner *scanner) { 151scan_next(Scanner *scanner) {
95 char c = sv_next(&scanner->current); 152 char c = str_next(&scanner->str);
96 if (c == '\n') { 153 if (c == '\n') {
97 scanner->line_number++; 154 scanner->line++;
98 scanner->col_number = 1; 155 scanner->col = 0;
99 } else { 156 } else {
100 scanner->col_number++; 157 scanner->col++;
101 } 158 }
102 scanner->offset++;
103 return c; 159 return c;
104} 160}
105 161
106void 162bool
107scan_rewind(Scanner *scanner) { 163scan_has_next(Scanner *scanner) {
108 sv_rewind(&scanner->current); 164 return scanner->str.size;
109 scanner->offset--;
110} 165}
111 166
112char 167char
113scan_peek(const Scanner *scanner) { 168scan_peek(Scanner *scanner) {
114 return sv_peek(&scanner->current); 169 return str_peek(scanner->str);
115} 170}
116 171
117bool 172void
118scan_has_next(const Scanner *scanner) { 173scan_skip_line(Scanner *scanner) {
119 return scanner->current.n != 0; 174 SearchResult newline = array_find_next(scanner->str, cstr("\n"));
175 if (newline.found) {
176 scanner->str.mem += newline.pos + 1;
177 scanner->str.size -= newline.pos + 1;
178 scanner->line++;
179 scanner->col = 0;
180 }
120} 181}
121 182
122void 183void
123skip_whitespace(Scanner *scanner) { 184scan_skip_whitespace(Scanner *scanner) {
124 while (scan_has_next(scanner)) { 185 while (scan_has_next(scanner)) {
125 char c = scan_peek(scanner); 186 char c = scan_peek(scanner);
126 switch (c) { 187 switch (c) {
127 case ' ': 188 case ' ':
189 case ',': // Commas are just syntactic sugar.
128 case '\f': 190 case '\f':
129 case '\n': 191 case '\n':
130 case '\r': 192 case '\r':
@@ -132,6 +194,10 @@ skip_whitespace(Scanner *scanner) {
132 case '\v': { 194 case '\v': {
133 scan_next(scanner); 195 scan_next(scanner);
134 } break; 196 } break;
197 case ';': {
198 // Found a comment! (skip)
199 scan_skip_line(scanner);
200 } break;
135 default: { 201 default: {
136 return; 202 return;
137 } break; 203 } break;
@@ -140,22 +206,33 @@ skip_whitespace(Scanner *scanner) {
140} 206}
141 207
142bool 208bool
143is_delimiter(char c) { 209scan_is_valid_split(char c) {
144 switch (c) { 210 switch (c) {
145 case EOF:
146 case '\0':
147 case ';': 211 case ';':
148 case '"':
149 case '\'':
150 case '(': 212 case '(':
151 case ')': 213 case ')':
152 case '[': 214 case '[':
153 case ']': 215 case ']':
154 case '{': 216 case '{':
155 case '}': 217 case '}':
218 case '+':
219 case '-':
220 case '*':
221 case '/':
222 case '%':
223 case '!':
224 case '=':
225 case '<':
226 case '>':
227 case '~':
228 case '&':
229 case '|':
156 case ':': 230 case ':':
231 case '.':
157 case '@': 232 case '@':
233 case '"':
158 case ' ': 234 case ' ':
235 case ',':
159 case '\f': 236 case '\f':
160 case '\n': 237 case '\n':
161 case '\r': 238 case '\r':
@@ -167,122 +244,351 @@ is_delimiter(char c) {
167 return false; 244 return false;
168} 245}
169 246
170TokenType 247void
171find_token_type(const StringView value) { 248scan_skip_until_valid(Scanner *scanner) {
172 for (size_t i = 0; i < sizeof(keywords) / sizeof(Keyword); i++) { 249 while (scan_has_next(scanner)) {
173 StringView keyword = (StringView){keywords[i].str, keywords[i].n}; 250 char c = scan_peek(scanner);
174 if (sv_equal(&value, &keyword)) { 251 if (scan_is_valid_split(c)) {
175 return keywords[i].token; 252 return;
176 } 253 }
254 scan_next(scanner);
177 } 255 }
178 return TOKEN_SYMBOL;
179} 256}
180 257
181void 258Token
182print_tokens(Token *tokens) { 259emit_token(Scanner current, Scanner *scanner, TokenType t) {
183 for (size_t i = 0; i < array_size(tokens); i++) { 260 Str val = current.str;
184 print_token(tokens[i]); 261 val.size = current.str.size - scanner->str.size;
185 } 262 val.size = val.size < 0 ? 0 : val.size;
263 return (Token){
264 .val = val,
265 .line = current.line + 1,
266 .col = current.col + 1,
267 .type = t,
268 };
186} 269}
187 270
188Token * 271Token
189tokenize(const StringView *sv) { 272emit_token_err(Scanner *scanner, Str err_msg) {
190 Token *tokens = NULL; 273 return (Token){
191 array_init(tokens, 1); 274 .line = scanner->line + 1,
192 Scanner scanner = (Scanner){ 275 .col = scanner->col + 1,
193 .current = *sv, 276 .val = err_msg,
194 .line_number = 1, 277 .type = TOK_UNKNOWN,
195 .col_number = 1,
196 }; 278 };
279}
197 280
198 while (scan_has_next(&scanner)) { 281Token
199 skip_whitespace(&scanner); 282emit_token_number(Scanner *scanner) {
200 size_t line = scanner.line_number; 283 Scanner current = *scanner;
201 size_t col = scanner.col_number; 284 char c = scan_peek(scanner);
202 size_t offset = scanner.offset; 285 if (c == '+' || c == '-') {
203 Token token = (Token){ 286 scan_next(scanner);
204 .type = TOKEN_UNKNOWN, 287 if (str_has_prefix(scanner->str, cstr("0b")) ||
205 .line = line, 288 str_has_prefix(scanner->str, cstr("0x"))) {
206 .col = col, 289 scan_skip_until_valid(scanner);
207 }; 290 return emit_token_err(
208 char c = scan_next(&scanner); 291 &current,
209 switch (c) { 292 cstr("malformed number: binary/hex numbers can't be signed"));
210 case ';': { 293 }
211 while ((c = scan_next(&scanner)) != '\n' && c != '\0') {} 294 }
295 if (str_has_prefix(scanner->str, cstr("0b"))) {
296 scan_next(scanner);
297 scan_next(scanner);
298 while (scan_has_next(scanner)) {
299 c = scan_peek(scanner);
300 if (c == '0' || c == '1' || c == '_') {
301 scan_next(scanner);
212 continue; 302 continue;
213 } break; 303 }
214 case '"': { 304 if (scan_is_valid_split(c)) {
215 char prev = c; 305 return emit_token(current, scanner, TOK_NUMBER);
216 bool found = false; 306 }
217 size_t n = 0; 307 scan_skip_until_valid(scanner);
218 while (scan_has_next(&scanner)) { 308 return emit_token_err(
219 c = scan_next(&scanner); 309 &current, cstr("malformed number: invalid binary number"));
220 if (c == '"' && prev != '\\') { 310 }
221 found = true; 311 } else if (str_has_prefix(scanner->str, cstr("0x"))) {
222 break; 312 scan_next(scanner);
223 } 313 scan_next(scanner);
224 prev = c; 314 while (scan_has_next(scanner)) {
225 n++; 315 c = scan_peek(scanner);
226 } 316 if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
227 if (!found) { 317 (c >= 'A' && c <= 'F') || c == '_') {
228 push_error(ERR_TYPE_LEXER, ERR_UNMATCHED_STRING, line, col); 318 scan_next(scanner);
229 return tokens; 319 continue;
230 } 320 }
231 token.value = (StringView){ 321 if (scan_is_valid_split(c)) {
232 .start = &sv->start[offset + 1], 322 return emit_token(current, scanner, TOK_NUMBER);
233 .n = n, 323 }
234 }; 324 scan_skip_until_valid(scanner);
235 token.type = TOKEN_STRING; 325 return emit_token_err(&current,
236 } break; 326 cstr("malformed number: invalid hex number"));
237 case '(': { token.type = TOKEN_LPAREN; } break; 327 }
238 case ')': { token.type = TOKEN_RPAREN; } break; 328 } else {
239 case '[': { token.type = TOKEN_LSQUARE; } break; 329 // Integral.
240 case ']': { token.type = TOKEN_RSQUARE; } break; 330 while (scan_has_next(scanner)) {
241 case '{': { token.type = TOKEN_LCURLY; } break; 331 c = scan_peek(scanner);
242 case '}': { token.type = TOKEN_RCURLY; } break; 332 if (c == '.') {
243 case ':': { token.type = TOKEN_COLON; } break; 333 scan_next(scanner);
244 case '.': { token.type = TOKEN_DOT; } break; 334 break;
245 case '@': { token.type = TOKEN_AT; } break; 335 }
246 default: { 336 if ((c >= '0' && c <= '9') || c == '_') {
247 if (c == EOF || c == '\0') { 337 scan_next(scanner);
248 token.type = TOKEN_EOF; 338 continue;
249 break; 339 }
250 } 340 if (scan_is_valid_split(c)) {
251 size_t n = 1; 341 return emit_token(current, scanner, TOK_NUMBER);
252 bool num = c == '-' && !is_delimiter(scan_peek(&scanner)); 342 }
253 num = num || (c == '+' && !is_delimiter(scan_peek(&scanner))); 343 scan_skip_until_valid(scanner);
254 num = num || (c >= '0' && c <= '9'); 344 return emit_token_err(&current, cstr("malformed number"));
255 if (num) { 345 }
256 while (!is_delimiter(scan_peek(&scanner))) { 346 c = scan_peek(scanner);
257 c = scan_next(&scanner); 347 if (!(c >= '0' && c <= '9')) {
258 n++; 348 return emit_token_err(&current,
259 } 349 cstr("malformed number: no decimal digits"));
260 token.value = (StringView){ 350 }
261 .start = &sv->start[offset], 351 // Decimals.
262 .n = n, 352 while (scan_has_next(scanner)) {
263 }; 353 c = scan_peek(scanner);
264 token.type = TOKEN_NUMBER; 354 if (c == 'e' || c == 'E') {
265 } else { 355 scan_next(scanner);
266 while (!is_delimiter(scan_peek(&scanner))) { 356 break;
267 if (scan_peek(&scanner) == '.') { 357 }
268 break; 358 if ((c >= '0' && c <= '9') || c == '_') {
269 } 359 scan_next(scanner);
270 c = scan_next(&scanner); 360 continue;
271 n++; 361 }
272 } 362 if (scan_is_valid_split(c)) {
273 token.value = (StringView){ 363 return emit_token(current, scanner, TOK_NUMBER);
274 .start = &sv->start[offset], 364 }
275 .n = n, 365 scan_skip_until_valid(scanner);
276 }; 366 return emit_token_err(&current, cstr("malformed number"));
277 token.type = find_token_type(token.value);
278 }
279 } break;
280 } 367 }
281 if (token.type == TOKEN_UNKNOWN) { 368 // Exponent.
282 push_error(ERR_TYPE_LEXER, ERR_UNKNOWN_TOK_TYPE, line, col); 369 c = scan_peek(scanner);
283 return tokens; 370 if (c == '+' || c == '-') {
371 scan_next(scanner);
284 } 372 }
285 array_push(tokens, token); 373 while (scan_has_next(scanner)) {
374 c = scan_peek(scanner);
375 if ((c >= '0' && c <= '9') || c == '_') {
376 scan_next(scanner);
377 continue;
378 }
379 if (c == '.') {
380 scan_next(scanner);
381 return emit_token_err(
382 &current,
383 cstr("malformed number: decimals not allowed on exponent"));
384 }
385 if (scan_is_valid_split(c)) {
386 return emit_token(current, scanner, TOK_NUMBER);
387 }
388 scan_skip_until_valid(scanner);
389 return emit_token_err(&current, cstr("malformed number"));
390 }
391 }
392 return emit_token_err(&current, cstr("malformed number"));
393}
394
395Token
396scan_token(Scanner *scanner) {
397 assert(scanner);
398
399 scan_skip_whitespace(scanner);
400 if (!scan_has_next(scanner)) {
401 return emit_token(*scanner, scanner, TOK_EOF);
402 }
403
404 Scanner current = *scanner;
405 char c = scan_next(scanner);
406 switch (c) {
407 case '(':
408 return emit_token(current, scanner, TOK_LPAREN);
409 case ')':
410 return emit_token(current, scanner, TOK_RPAREN);
411 case '[':
412 return emit_token(current, scanner, TOK_LSQUARE);
413 case ']':
414 return emit_token(current, scanner, TOK_RSQUARE);
415 case '{':
416 return emit_token(current, scanner, TOK_LCURLY);
417 case '}':
418 return emit_token(current, scanner, TOK_RCURLY);
419 case '+': {
420 char p = scan_peek(scanner);
421 if (p >= '0' && p <= '9') {
422 *scanner = current;
423 return emit_token_number(scanner);
424 }
425 return emit_token(current, scanner, TOK_ADD);
426 };
427 case '-': {
428 char p = scan_peek(scanner);
429 if (p >= '0' && p <= '9') {
430 *scanner = current;
431 return emit_token_number(scanner);
432 }
433 return emit_token(current, scanner, TOK_ADD);
434 };
435 case '*':
436 return emit_token(current, scanner, TOK_MUL);
437 case '/':
438 return emit_token(current, scanner, TOK_DIV);
439 case '%':
440 return emit_token(current, scanner, TOK_MOD);
441 case '!': {
442 if (scan_peek(scanner) == '=') {
443 scan_next(scanner);
444 return emit_token(current, scanner, TOK_NOTEQ);
445 }
446 return emit_token(current, scanner, TOK_NOT);
447 };
448 case '=': {
449 if (scan_peek(scanner) == '=') {
450 scan_next(scanner);
451 return emit_token(current, scanner, TOK_EQ);
452 }
453 return emit_token(current, scanner, TOK_ASSIGN);
454 };
455 case '<': {
456 char p = scan_peek(scanner);
457 if (p == '=') {
458 scan_next(scanner);
459 return emit_token(current, scanner, TOK_LE);
460 }
461 if (p == '<') {
462 scan_next(scanner);
463 return emit_token(current, scanner, TOK_BITLSHIFT);
464 }
465 return emit_token(current, scanner, TOK_LT);
466 };
467 case '>': {
468 char p = scan_peek(scanner);
469 if (p == '=') {
470 scan_next(scanner);
471 return emit_token(current, scanner, TOK_GE);
472 }
473 if (p == '>') {
474 scan_next(scanner);
475 return emit_token(current, scanner, TOK_BITRSHIFT);
476 }
477 return emit_token(current, scanner, TOK_GT);
478 };
479 case '~':
480 return emit_token(current, scanner, TOK_BITNOT);
481 case '&': {
482 if (scan_peek(scanner) == '&') {
483 scan_next(scanner);
484 return emit_token(current, scanner, TOK_AND);
485 }
486 return emit_token(current, scanner, TOK_BITAND);
487 };
488 case '|': {
489 if (scan_peek(scanner) == '|') {
490 scan_next(scanner);
491 return emit_token(current, scanner, TOK_OR);
492 }
493 return emit_token(current, scanner, TOK_BITOR);
494 };
495 case ':':
496 return emit_token(current, scanner, TOK_COLON);
497 case '.':
498 return emit_token(current, scanner, TOK_DOT);
499 case '@':
500 return emit_token(current, scanner, TOK_AT);
501 case '"': {
502 while (scan_has_next(scanner)) {
503 c = scan_next(scanner);
504 if (c == '\\') {
505 scan_next(scanner);
506 continue;
507 }
508 if (c == '"') {
509 return emit_token(current, scanner, TOK_STRING);
510 }
511 }
512 return emit_token_err(&current, cstr("mismatched string quotes"));
513 };
514 }
515 if (c >= '0' && c <= '9') {
516 *scanner = current;
517 return emit_token_number(scanner);
518 }
519
520 scan_skip_until_valid(scanner);
521 Str val = current.str;
522 val.size = current.str.size - scanner->str.size;
523 val.size = val.size < 0 ? 0 : val.size;
524 if (val.size == 0) {
525 return emit_token_err(&current, cstr("unexpected character"));
526 }
527 switch (val.mem[0]) {
528 case 'b': {
529 if (str_has_prefix(val, cstr("break"))) {
530 return emit_token(current, scanner, TOK_BREAK);
531 }
532 } break;
533 case 'c': {
534 if (str_has_prefix(val, cstr("case"))) {
535 return emit_token(current, scanner, TOK_CASE);
536 }
537 if (str_has_prefix(val, cstr("continue"))) {
538 return emit_token(current, scanner, TOK_CONTINUE);
539 }
540 } break;
541 case 'f': {
542 if (str_has_prefix(val, cstr("false"))) {
543 return emit_token(current, scanner, TOK_FALSE);
544 }
545 if (str_has_prefix(val, cstr("fun"))) {
546 return emit_token(current, scanner, TOK_FUN);
547 }
548 } break;
549 case 'i': {
550 if (str_has_prefix(val, cstr("if"))) {
551 return emit_token(current, scanner, TOK_IF);
552 }
553 } break;
554 case 'l': {
555 if (str_has_prefix(val, cstr("let"))) {
556 return emit_token(current, scanner, TOK_LET);
557 }
558 } break;
559 case 'm': {
560 if (str_has_prefix(val, cstr("match"))) {
561 return emit_token(current, scanner, TOK_MATCH);
562 }
563 } break;
564 case 'n': {
565 if (str_has_prefix(val, cstr("nil"))) {
566 return emit_token(current, scanner, TOK_NIL);
567 }
568 } break;
569 case 'r': {
570 if (str_has_prefix(val, cstr("return"))) {
571 return emit_token(current, scanner, TOK_RETURN);
572 }
573 } break;
574 case 's': {
575 if (str_has_prefix(val, cstr("set"))) {
576 return emit_token(current, scanner, TOK_SET);
577 }
578 if (str_has_prefix(val, cstr("struct"))) {
579 return emit_token(current, scanner, TOK_STRUCT);
580 }
581 } break;
582 case 't': {
583 if (str_has_prefix(val, cstr("true"))) {
584 return emit_token(current, scanner, TOK_TRUE);
585 }
586 } break;
587 case 'w': {
588 if (str_has_prefix(val, cstr("while"))) {
589 return emit_token(current, scanner, TOK_WHILE);
590 }
591 } break;
286 } 592 }
287 return tokens; 593 return emit_token(current, scanner, TOK_SYMBOL);
288} 594}