diff options
author | Bad Diode <bd@badd10de.dev> | 2021-10-29 15:37:28 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2021-10-29 15:37:28 +0200 |
commit | e73a4c16a2269cdb2f5e7d66fb9839e4c44e14de (patch) | |
tree | c44721b005b7a0623e7acc7103ca8e21a25ff422 /src/lexer.c | |
parent | fcc131afdd029c606ea39f3557bc3d33a075b1de (diff) | |
download | bdl-e73a4c16a2269cdb2f5e7d66fb9839e4c44e14de.tar.gz bdl-e73a4c16a2269cdb2f5e7d66fb9839e4c44e14de.zip |
Prepare third compiler implementation
Diffstat (limited to 'src/lexer.c')
-rwxr-xr-x | src/lexer.c | 244 |
1 files changed, 244 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c new file mode 100755 index 0000000..6a417e4 --- /dev/null +++ b/src/lexer.c | |||
@@ -0,0 +1,244 @@ | |||
1 | #include "lexer.h" | ||
2 | #include "errors.h" | ||
3 | |||
4 | static const char* token_str[] = { | ||
5 | [TOKEN_UNKNOWN] = "TOKEN_UNKNOWN", | ||
6 | [TOKEN_LPAREN] = "TOKEN_LPAREN", | ||
7 | [TOKEN_RPAREN] = "TOKEN_RPAREN", | ||
8 | [TOKEN_FIXNUM] = "TOKEN_FIXNUM", | ||
9 | [TOKEN_SYMBOL] = "TOKEN_SYMBOL", | ||
10 | [TOKEN_STRING] = "TOKEN_STRING", | ||
11 | [TOKEN_NIL] = "TOKEN_NIL", | ||
12 | [TOKEN_TRUE] = "TOKEN_TRUE", | ||
13 | [TOKEN_FALSE] = "TOKEN_FALSE", | ||
14 | [TOKEN_EOF] = "TOKEN_EOF", | ||
15 | }; | ||
16 | |||
17 | void | ||
18 | print_token(Token tok) { | ||
19 | printf("[%4ld:%-4ld] ", tok.line, tok.column); | ||
20 | printf("%s", token_str[tok.type]); | ||
21 | switch (tok.type) { | ||
22 | case TOKEN_FIXNUM: { | ||
23 | printf(" -> "); | ||
24 | sv_write(&tok.value); | ||
25 | } break; | ||
26 | case TOKEN_SYMBOL: { | ||
27 | printf(" -> "); | ||
28 | sv_write(&tok.value); | ||
29 | } break; | ||
30 | case TOKEN_STRING: { | ||
31 | printf(" -> "); | ||
32 | sv_write(&tok.value); | ||
33 | } break; | ||
34 | default: { | ||
35 | } break; | ||
36 | } | ||
37 | printf("\n"); | ||
38 | } | ||
39 | |||
40 | char | ||
41 | scan_next(Scanner *scanner) { | ||
42 | char c = sv_next(&scanner->current); | ||
43 | if (c == '\n') { | ||
44 | scanner->line_number++; | ||
45 | scanner->col_number = 1; | ||
46 | } else { | ||
47 | scanner->col_number++; | ||
48 | } | ||
49 | scanner->offset++; | ||
50 | return c; | ||
51 | } | ||
52 | |||
53 | char | ||
54 | scan_peek(const Scanner *scanner) { | ||
55 | return sv_peek(&scanner->current); | ||
56 | } | ||
57 | |||
58 | bool | ||
59 | scan_has_next(const Scanner *scanner) { | ||
60 | return scanner->current.n != 0; | ||
61 | } | ||
62 | |||
63 | void | ||
64 | skip_whitespace(Scanner *scanner) { | ||
65 | while (scan_has_next(scanner)) { | ||
66 | char c = scan_peek(scanner); | ||
67 | switch (c) { | ||
68 | case ' ': | ||
69 | case '\f': | ||
70 | case '\n': | ||
71 | case '\r': | ||
72 | case '\t': | ||
73 | case '\v': { | ||
74 | scan_next(scanner); | ||
75 | } break; | ||
76 | default: { | ||
77 | return; | ||
78 | } break; | ||
79 | } | ||
80 | } | ||
81 | } | ||
82 | |||
83 | bool | ||
84 | is_delimiter(char c) { | ||
85 | switch (c) { | ||
86 | case EOF: | ||
87 | case '\0': | ||
88 | case ';': | ||
89 | case '"': | ||
90 | case '\'': | ||
91 | case '(': | ||
92 | case ')': | ||
93 | case ' ': | ||
94 | case '\f': | ||
95 | case '\n': | ||
96 | case '\r': | ||
97 | case '\t': | ||
98 | case '\v': { | ||
99 | return true; | ||
100 | } break; | ||
101 | } | ||
102 | return false; | ||
103 | } | ||
104 | |||
105 | #define TOKEN_IS_KEYWORD(VAL, KEYWORD) \ | ||
106 | sv_equal(&(VAL), &(StringView){(KEYWORD), sizeof(KEYWORD) - 1}) | ||
107 | |||
108 | TokenType | ||
109 | find_primitive_type(const StringView value) { | ||
110 | bool is_fixnum = true; | ||
111 | for (size_t i = 0; i < value.n; i++) { | ||
112 | char c = value.start[i]; | ||
113 | if (i == 0 && c == '-' && value.n > 1) { | ||
114 | continue; | ||
115 | } | ||
116 | if (!(c >= '0' && c <= '9')) { | ||
117 | is_fixnum = false; | ||
118 | break; | ||
119 | } | ||
120 | } | ||
121 | if (is_fixnum) { | ||
122 | return TOKEN_FIXNUM; | ||
123 | } | ||
124 | if (TOKEN_IS_KEYWORD(value, "nil")) { return TOKEN_NIL; } | ||
125 | if (TOKEN_IS_KEYWORD(value, "true")) { return TOKEN_TRUE; } | ||
126 | if (TOKEN_IS_KEYWORD(value, "false")) { return TOKEN_FALSE; } | ||
127 | |||
128 | return TOKEN_SYMBOL; | ||
129 | } | ||
130 | |||
131 | Tokens | ||
132 | tokenize(const StringView *sv) { | ||
133 | Tokens tokens = {0}; | ||
134 | tokens.tokens = NULL; | ||
135 | array_init(tokens.tokens, 1); | ||
136 | Scanner scanner = (Scanner){ | ||
137 | .current = *sv, | ||
138 | .line_number = 1, | ||
139 | .col_number = 1, | ||
140 | }; | ||
141 | |||
142 | while (scan_has_next(&scanner)) { | ||
143 | skip_whitespace(&scanner); | ||
144 | size_t line = scanner.line_number; | ||
145 | size_t col = scanner.col_number; | ||
146 | size_t offset = scanner.offset; | ||
147 | char c = scan_next(&scanner); | ||
148 | switch (c) { | ||
149 | case ';': { | ||
150 | while ((c = scan_next(&scanner)) != '\n' && c != '\0') {} | ||
151 | } break; | ||
152 | case '"': { | ||
153 | char prev = c; | ||
154 | bool found = false; | ||
155 | size_t n = 0; | ||
156 | while (scan_has_next(&scanner)) { | ||
157 | c = scan_next(&scanner); | ||
158 | if (c == '"' && prev != '\\') { | ||
159 | found = true; | ||
160 | break; | ||
161 | } | ||
162 | prev = c; | ||
163 | n++; | ||
164 | } | ||
165 | if (!found) { | ||
166 | error_push(&tokens.errors, (Error){ | ||
167 | .type = ERR_TYPE_LEXER, | ||
168 | .value = ERR_UNMATCHED_STRING, | ||
169 | .line = line, | ||
170 | .col = col, | ||
171 | }); | ||
172 | return tokens; | ||
173 | } | ||
174 | Token token = (Token){ | ||
175 | .value = (StringView){ | ||
176 | .start = &sv->start[offset + 1], | ||
177 | .n = n, | ||
178 | }, | ||
179 | .type = TOKEN_STRING, | ||
180 | .line = line, | ||
181 | .column = col, | ||
182 | }; | ||
183 | array_push(tokens.tokens, token); | ||
184 | } break; | ||
185 | case '(': { | ||
186 | if (scan_peek(&scanner) == ')') { | ||
187 | scan_next(&scanner); | ||
188 | Token token = (Token){ | ||
189 | .type = TOKEN_NIL, | ||
190 | .line = line, | ||
191 | .column = col, | ||
192 | }; | ||
193 | array_push(tokens.tokens, token); | ||
194 | } else { | ||
195 | Token token = (Token){ | ||
196 | .type = TOKEN_LPAREN, | ||
197 | .line = line, | ||
198 | .column = col, | ||
199 | }; | ||
200 | array_push(tokens.tokens, token); | ||
201 | } | ||
202 | } break; | ||
203 | case ')': { | ||
204 | Token token = (Token){ | ||
205 | .type = TOKEN_RPAREN, | ||
206 | .line = line, | ||
207 | .column = col, | ||
208 | }; | ||
209 | array_push(tokens.tokens, token); | ||
210 | } break; | ||
211 | default: { | ||
212 | size_t n = 1; | ||
213 | while (!is_delimiter(scan_peek(&scanner))) { | ||
214 | scan_next(&scanner); | ||
215 | n++; | ||
216 | } | ||
217 | if (c == EOF || c == '\0') { | ||
218 | break; | ||
219 | } | ||
220 | Token token = (Token){ | ||
221 | .value = (StringView){ | ||
222 | .start = &sv->start[offset], | ||
223 | .n = n, | ||
224 | }, | ||
225 | .type = TOKEN_SYMBOL, | ||
226 | .line = line, | ||
227 | .column = col, | ||
228 | }; | ||
229 | token.type = find_primitive_type(token.value); | ||
230 | array_push(tokens.tokens, token); | ||
231 | } break; | ||
232 | } | ||
233 | } | ||
234 | |||
235 | // Push EOF token. | ||
236 | Token token = (Token){ | ||
237 | .type = TOKEN_EOF, | ||
238 | .line = scanner.line_number, | ||
239 | .column = 1, | ||
240 | }; | ||
241 | array_push(tokens.tokens, token); | ||
242 | |||
243 | return tokens; | ||
244 | } | ||