aboutsummaryrefslogtreecommitdiffstats
path: root/src/bootstrap/lexer.c
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2021-10-22 09:59:31 +0200
committerBad Diode <bd@badd10de.dev>2021-10-22 09:59:31 +0200
commiteeff5e273f22aa28e81ab080e9ffdce85ac394b8 (patch)
tree71d11c76be7c0bb649099bb55e6181f9b7c6c8a8 /src/bootstrap/lexer.c
parent5bd694fc7071bfb76b9f65c89d253b2b4e18cf63 (diff)
downloadbdl-eeff5e273f22aa28e81ab080e9ffdce85ac394b8.tar.gz
bdl-eeff5e273f22aa28e81ab080e9ffdce85ac394b8.zip
Prepare skeleton for bytecode interpreter
Diffstat (limited to 'src/bootstrap/lexer.c')
-rw-r--r--src/bootstrap/lexer.c257
1 files changed, 0 insertions, 257 deletions
diff --git a/src/bootstrap/lexer.c b/src/bootstrap/lexer.c
deleted file mode 100644
index 38ca37c..0000000
--- a/src/bootstrap/lexer.c
+++ /dev/null
@@ -1,257 +0,0 @@
1#include "lexer.h"
2
3void
4print_token(Token tok) {
5 printf("LINE: %3ld COL: %3ld ", tok.line, tok.column);
6 switch (tok.type) {
7 case TOKEN_LPAREN: {
8 printf("TOKEN_LPAREN");
9 } break;
10 case TOKEN_RPAREN: {
11 printf("TOKEN_RPAREN");
12 } break;
13 case TOKEN_QUOTE: {
14 printf("TOKEN_QUOTE");
15 } break;
16 case TOKEN_TRUE: {
17 printf("TOKEN_TRUE");
18 } break;
19 case TOKEN_FALSE: {
20 printf("TOKEN_FALSE");
21 } break;
22 case TOKEN_NIL: {
23 printf("TOKEN_NIL");
24 } break;
25 case TOKEN_FIXNUM: {
26 printf("TOKEN_FIXNUM -> ");
27 sv_write(&tok.value, stdout);
28 } break;
29 case TOKEN_SYMBOL: {
30 printf("TOKEN_SYMBOL -> ");
31 sv_write(&tok.value, stdout);
32 } break;
33 case TOKEN_STRING: {
34 printf("TOKEN_STRING -> ");
35 sv_write(&tok.value, stdout);
36 } break;
37 case TOKEN_EOF: {
38 printf("TOKEN_EOF");
39 } break;
40 case TOKEN_UNKNOWN: {
41 printf("TOKEN_UNKNOWN");
42 } break;
43 }
44 printf("\n");
45}
46
47char
48scan_next(Scanner *scanner) {
49 char c = sv_next(&scanner->current);
50 if (c == '\n') {
51 scanner->line_number++;
52 scanner->col_number = 1;
53 } else {
54 scanner->col_number++;
55 }
56 scanner->offset++;
57 return c;
58}
59
60char
61scan_peek(const Scanner *scanner) {
62 return sv_peek(&scanner->current);
63}
64
65bool
66scan_has_next(const Scanner *scanner) {
67 return scanner->current.n != 0;
68}
69
70void
71skip_whitespace(Scanner *scanner) {
72 while (scan_has_next(scanner)) {
73 char c = scan_peek(scanner);
74 switch (c) {
75 case ' ':
76 case '\f':
77 case '\n':
78 case '\r':
79 case '\t':
80 case '\v': {
81 scan_next(scanner);
82 } break;
83 default: {
84 return;
85 } break;
86 }
87 }
88}
89
90bool
91is_delimiter(char c) {
92 switch (c) {
93 case EOF:
94 case '\0':
95 case ';':
96 case '"':
97 case '\'':
98 case '(':
99 case ')':
100 case ' ':
101 case '\f':
102 case '\n':
103 case '\r':
104 case '\t':
105 case '\v': {
106 return true;
107 } break;
108 }
109 return false;
110}
111
112TokenType
113find_primitive_type(const StringView value) {
114 bool is_fixnum = true;
115 for (size_t i = 0; i < value.n; i++) {
116 char c = value.start[i];
117 if (i == 0 && c == '-' && value.n > 1) {
118 continue;
119 }
120 if (!(c >= '0' && c <= '9')) {
121 is_fixnum = false;
122 break;
123 }
124 }
125 if (is_fixnum) {
126 return TOKEN_FIXNUM;
127 }
128 if (sv_equal(&value, &(StringView){"true", 4})) {
129 return TOKEN_TRUE;
130 }
131 if (sv_equal(&value, &(StringView){"false", 5})) {
132 return TOKEN_FALSE;
133 }
134 return TOKEN_SYMBOL;
135}
136
137Token *
138tokenize(const StringView *sv) {
139 Token *tokens = NULL;
140 array_init(tokens, 1);
141 Scanner scanner = (Scanner){
142 .current = *sv,
143 .line_number = 1,
144 .col_number = 1,
145 };
146
147 while (scan_has_next(&scanner)) {
148 skip_whitespace(&scanner);
149 size_t line = scanner.line_number;
150 size_t col = scanner.col_number;
151 size_t offset = scanner.offset;
152 char c = scan_next(&scanner);
153 switch (c) {
154 case ';': {
155 while ((c = scan_next(&scanner)) != '\n' && c != '\0') {}
156 } break;
157 case '"': {
158 char prev = c;
159 bool found = false;
160 size_t n = 0;
161 while (scan_has_next(&scanner)) {
162 c = scan_next(&scanner);
163 if (c == '"' && prev != '\\') {
164 found = true;
165 break;
166 }
167 prev = c;
168 n++;
169 }
170 if (!found) {
171 error_push((Error){
172 .type = ERR_TYPE_LEXER,
173 .value = ERR_UNMATCHED_STRING,
174 .line = line,
175 .col = col,
176 });
177 return tokens;
178 }
179 Token token = (Token){
180 .value = (StringView){
181 .start = &sv->start[offset + 1],
182 .n = n,
183 },
184 .type = TOKEN_STRING,
185 .line = line,
186 .column = col,
187 };
188 array_push(tokens, token);
189 } break;
190 case '\'': {
191 Token token = (Token){
192 .type = TOKEN_QUOTE,
193 .line = line,
194 .column = col,
195 };
196 array_push(tokens, token);
197 } break;
198 case '(': {
199 if (scan_peek(&scanner) == ')') {
200 scan_next(&scanner);
201 Token token = (Token){
202 .type = TOKEN_NIL,
203 .line = line,
204 .column = col,
205 };
206 array_push(tokens, token);
207 } else {
208 Token token = (Token){
209 .type = TOKEN_LPAREN,
210 .line = line,
211 .column = col,
212 };
213 array_push(tokens, token);
214 }
215 } break;
216 case ')': {
217 Token token = (Token){
218 .type = TOKEN_RPAREN,
219 .line = line,
220 .column = col,
221 };
222 array_push(tokens, token);
223 } break;
224 default: {
225 size_t n = 1;
226 while (!is_delimiter(scan_peek(&scanner))) {
227 scan_next(&scanner);
228 n++;
229 }
230 if (c == EOF || c == '\0') {
231 break;
232 }
233 Token token = (Token){
234 .value = (StringView){
235 .start = &sv->start[offset],
236 .n = n,
237 },
238 .type = TOKEN_SYMBOL,
239 .line = line,
240 .column = col,
241 };
242 token.type = find_primitive_type(token.value);
243 array_push(tokens, token);
244 } break;
245 }
246 }
247
248 // Push EOF token.
249 Token token = (Token){
250 .type = TOKEN_EOF,
251 .line = scanner.line_number,
252 .column = 1,
253 };
254 array_push(tokens, token);
255
256 return tokens;
257}