diff options
Diffstat (limited to 'src/bootstrap/lexer.c')
-rw-r--r-- | src/bootstrap/lexer.c | 207 |
1 files changed, 0 insertions, 207 deletions
diff --git a/src/bootstrap/lexer.c b/src/bootstrap/lexer.c deleted file mode 100644 index b03db77..0000000 --- a/src/bootstrap/lexer.c +++ /dev/null | |||
@@ -1,207 +0,0 @@ | |||
1 | typedef enum TokenType { | ||
2 | TOKEN_UNKNOWN = 0, | ||
3 | TOKEN_LPAREN, | ||
4 | TOKEN_RPAREN, | ||
5 | TOKEN_FIXNUM, | ||
6 | TOKEN_SYMBOL, | ||
7 | TOKEN_BOOL, | ||
8 | TOKEN_STRING, | ||
9 | } TokenType; | ||
10 | |||
11 | typedef struct Token { | ||
12 | TokenType type; | ||
13 | StringView value; | ||
14 | } Token; | ||
15 | |||
16 | typedef struct Tokens { | ||
17 | Token *start; | ||
18 | size_t n; | ||
19 | } Tokens; | ||
20 | |||
21 | #define TRUE_TOKEN (StringView){"true", 4} | ||
22 | #define FALSE_TOKEN (StringView){"false", 5} | ||
23 | #define LPAREN_TOKEN (StringView){"(", 1} | ||
24 | #define RPAREN_TOKEN (StringView){")", 1} | ||
25 | |||
26 | TokenType | ||
27 | find_token_type(StringView value) { | ||
28 | bool is_fixnum = true; | ||
29 | for (size_t i = 0; i < value.n; i++) { | ||
30 | char c = value.start[i]; | ||
31 | if (i == 0 && c == '-' && value.n > 1) { | ||
32 | continue; | ||
33 | } | ||
34 | if (!isdigit(c)) { | ||
35 | is_fixnum = false; | ||
36 | break; | ||
37 | } | ||
38 | } | ||
39 | if (is_fixnum) { | ||
40 | return TOKEN_FIXNUM; | ||
41 | } | ||
42 | |||
43 | if (sv_equal(value, TRUE_TOKEN) || sv_equal(value, FALSE_TOKEN)) { | ||
44 | return TOKEN_BOOL; | ||
45 | } | ||
46 | |||
47 | return TOKEN_SYMBOL; | ||
48 | } | ||
49 | |||
50 | Tokens | ||
51 | tokenize(StringView sv) { | ||
52 | // NOTE: Not allocating any memory for now, but we are limited by a maximum | ||
53 | // number of tokens we can process. | ||
54 | #define TOKENS_BUF_SIZE KB(64) | ||
55 | static Token tokens_buf[TOKENS_BUF_SIZE]; | ||
56 | |||
57 | // Clear buffer. | ||
58 | for (size_t i = 0; i < TOKENS_BUF_SIZE; i++) { | ||
59 | tokens_buf[i] = (Token){0}; | ||
60 | } | ||
61 | |||
62 | size_t n = 0; | ||
63 | size_t token_n = 0; | ||
64 | for (size_t i = 0; i < sv.n; i++) { | ||
65 | switch (sv.start[i]) { | ||
66 | case ' ': | ||
67 | case '\f': | ||
68 | case '\n': | ||
69 | case '\r': | ||
70 | case '\t': | ||
71 | case '\v': { | ||
72 | if (token_n != 0) { | ||
73 | Token token = (Token){ | ||
74 | .type = TOKEN_UNKNOWN, | ||
75 | .value = (StringView){ | ||
76 | .start = &sv.start[i - token_n], | ||
77 | .n = token_n, | ||
78 | } | ||
79 | }; | ||
80 | token.type = find_token_type(token.value); | ||
81 | tokens_buf[n++] = token; | ||
82 | token_n = 0; | ||
83 | } | ||
84 | } break; | ||
85 | case ';': { | ||
86 | if (token_n != 0) { | ||
87 | Token token = (Token){ | ||
88 | .type = TOKEN_UNKNOWN, | ||
89 | .value = (StringView){ | ||
90 | .start = &sv.start[i - token_n], | ||
91 | .n = token_n, | ||
92 | } | ||
93 | }; | ||
94 | token.type = find_token_type(token.value); | ||
95 | tokens_buf[n++] = token; | ||
96 | token_n = 0; | ||
97 | } | ||
98 | |||
99 | // Advance until the next newline. | ||
100 | do { | ||
101 | i++; | ||
102 | } while (i < sv.n && sv.start[(i + 1)] != '\n'); | ||
103 | } break; | ||
104 | case '"': { | ||
105 | if (token_n != 0) { | ||
106 | fprintf(stderr, "error: string started inside symbol\n"); | ||
107 | return (Tokens){0}; | ||
108 | } | ||
109 | |||
110 | // Find end delimiter. | ||
111 | size_t string_start = i + 1; | ||
112 | size_t string_end = i + 1; | ||
113 | while (true) { | ||
114 | if (sv.start[string_end] == '"' && sv.start[string_end - 1] != '\\') { | ||
115 | break; | ||
116 | } | ||
117 | if (string_end >= sv.n) { | ||
118 | fprintf(stderr, "error: string delimiter not found\n"); | ||
119 | return (Tokens){0}; | ||
120 | } | ||
121 | string_end++; | ||
122 | } | ||
123 | |||
124 | Token token = (Token){ | ||
125 | .type = TOKEN_STRING, | ||
126 | .value = (StringView){ | ||
127 | .start = &sv.start[string_start], | ||
128 | .n = string_end - string_start, | ||
129 | } | ||
130 | }; | ||
131 | tokens_buf[n++] = token; | ||
132 | token_n = 0; | ||
133 | i += string_end - string_start + 1; | ||
134 | } break; | ||
135 | case '(': { | ||
136 | if ((i + 1) < sv.n) { | ||
137 | char next_c = sv.start[i + 1]; | ||
138 | if (isspace(next_c)) { | ||
139 | fprintf(stderr, "error: lparen delimiter followed by space\n"); | ||
140 | return (Tokens){0}; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | if (token_n != 0) { | ||
145 | fprintf(stderr, "error: lparen delimiter within symbol name\n"); | ||
146 | return (Tokens){0}; | ||
147 | } | ||
148 | |||
149 | Token token = (Token){ | ||
150 | .type = TOKEN_LPAREN, | ||
151 | .value = LPAREN_TOKEN, | ||
152 | }; | ||
153 | tokens_buf[n++] = token; | ||
154 | } break; | ||
155 | case ')': { | ||
156 | if ((i + 1) < sv.n) { | ||
157 | char next_c = sv.start[i + 1]; | ||
158 | if ((next_c != ')' && !isspace(next_c))) { | ||
159 | fprintf(stderr, "error: rparen delimiter within symbol name\n"); | ||
160 | return (Tokens){0}; | ||
161 | } | ||
162 | } | ||
163 | |||
164 | if (token_n != 0) { | ||
165 | // Push previous token. | ||
166 | Token token = (Token){ | ||
167 | .type = TOKEN_UNKNOWN, | ||
168 | .value = (StringView){ | ||
169 | .start = &sv.start[i - token_n], | ||
170 | .n = token_n, | ||
171 | } | ||
172 | }; | ||
173 | token.type = find_token_type(token.value); | ||
174 | tokens_buf[n++] = token; | ||
175 | token_n = 0; | ||
176 | } | ||
177 | |||
178 | Token token = (Token){ | ||
179 | .type = TOKEN_RPAREN, | ||
180 | .value = RPAREN_TOKEN, | ||
181 | }; | ||
182 | tokens_buf[n++] = token; | ||
183 | } break; | ||
184 | case EOF: { | ||
185 | break; | ||
186 | } break; | ||
187 | default: { | ||
188 | token_n++; | ||
189 | } break; | ||
190 | } | ||
191 | } | ||
192 | if (token_n != 0) { | ||
193 | // End of line encountered. | ||
194 | Token token = (Token){ | ||
195 | .type = TOKEN_UNKNOWN, | ||
196 | .value = (StringView){ | ||
197 | .start = &sv.start[sv.n - token_n], | ||
198 | .n = token_n, | ||
199 | } | ||
200 | }; | ||
201 | token.type = find_token_type(token.value); | ||
202 | tokens_buf[n++] = token; | ||
203 | } | ||
204 | |||
205 | return (Tokens){.start = (Token *)&tokens_buf, .n = n}; | ||
206 | } | ||
207 | |||