aboutsummaryrefslogtreecommitdiffstats
path: root/src/lexer.c
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2021-10-29 15:37:28 +0200
committerBad Diode <bd@badd10de.dev>2021-10-29 15:37:28 +0200
commite73a4c16a2269cdb2f5e7d66fb9839e4c44e14de (patch)
treec44721b005b7a0623e7acc7103ca8e21a25ff422 /src/lexer.c
parentfcc131afdd029c606ea39f3557bc3d33a075b1de (diff)
downloadbdl-e73a4c16a2269cdb2f5e7d66fb9839e4c44e14de.tar.gz
bdl-e73a4c16a2269cdb2f5e7d66fb9839e4c44e14de.zip
Prepare third compiler implementation
Diffstat (limited to 'src/lexer.c')
-rwxr-xr-xsrc/lexer.c244
1 files changed, 244 insertions, 0 deletions
diff --git a/src/lexer.c b/src/lexer.c
new file mode 100755
index 0000000..6a417e4
--- /dev/null
+++ b/src/lexer.c
@@ -0,0 +1,244 @@
1#include "lexer.h"
2#include "errors.h"
3
4static const char* token_str[] = {
5 [TOKEN_UNKNOWN] = "TOKEN_UNKNOWN",
6 [TOKEN_LPAREN] = "TOKEN_LPAREN",
7 [TOKEN_RPAREN] = "TOKEN_RPAREN",
8 [TOKEN_FIXNUM] = "TOKEN_FIXNUM",
9 [TOKEN_SYMBOL] = "TOKEN_SYMBOL",
10 [TOKEN_STRING] = "TOKEN_STRING",
11 [TOKEN_NIL] = "TOKEN_NIL",
12 [TOKEN_TRUE] = "TOKEN_TRUE",
13 [TOKEN_FALSE] = "TOKEN_FALSE",
14 [TOKEN_EOF] = "TOKEN_EOF",
15};
16
17void
18print_token(Token tok) {
19 printf("[%4ld:%-4ld] ", tok.line, tok.column);
20 printf("%s", token_str[tok.type]);
21 switch (tok.type) {
22 case TOKEN_FIXNUM: {
23 printf(" -> ");
24 sv_write(&tok.value);
25 } break;
26 case TOKEN_SYMBOL: {
27 printf(" -> ");
28 sv_write(&tok.value);
29 } break;
30 case TOKEN_STRING: {
31 printf(" -> ");
32 sv_write(&tok.value);
33 } break;
34 default: {
35 } break;
36 }
37 printf("\n");
38}
39
40char
41scan_next(Scanner *scanner) {
42 char c = sv_next(&scanner->current);
43 if (c == '\n') {
44 scanner->line_number++;
45 scanner->col_number = 1;
46 } else {
47 scanner->col_number++;
48 }
49 scanner->offset++;
50 return c;
51}
52
53char
54scan_peek(const Scanner *scanner) {
55 return sv_peek(&scanner->current);
56}
57
58bool
59scan_has_next(const Scanner *scanner) {
60 return scanner->current.n != 0;
61}
62
63void
64skip_whitespace(Scanner *scanner) {
65 while (scan_has_next(scanner)) {
66 char c = scan_peek(scanner);
67 switch (c) {
68 case ' ':
69 case '\f':
70 case '\n':
71 case '\r':
72 case '\t':
73 case '\v': {
74 scan_next(scanner);
75 } break;
76 default: {
77 return;
78 } break;
79 }
80 }
81}
82
83bool
84is_delimiter(char c) {
85 switch (c) {
86 case EOF:
87 case '\0':
88 case ';':
89 case '"':
90 case '\'':
91 case '(':
92 case ')':
93 case ' ':
94 case '\f':
95 case '\n':
96 case '\r':
97 case '\t':
98 case '\v': {
99 return true;
100 } break;
101 }
102 return false;
103}
104
105#define TOKEN_IS_KEYWORD(VAL, KEYWORD) \
106 sv_equal(&(VAL), &(StringView){(KEYWORD), sizeof(KEYWORD) - 1})
107
108TokenType
109find_primitive_type(const StringView value) {
110 bool is_fixnum = true;
111 for (size_t i = 0; i < value.n; i++) {
112 char c = value.start[i];
113 if (i == 0 && c == '-' && value.n > 1) {
114 continue;
115 }
116 if (!(c >= '0' && c <= '9')) {
117 is_fixnum = false;
118 break;
119 }
120 }
121 if (is_fixnum) {
122 return TOKEN_FIXNUM;
123 }
124 if (TOKEN_IS_KEYWORD(value, "nil")) { return TOKEN_NIL; }
125 if (TOKEN_IS_KEYWORD(value, "true")) { return TOKEN_TRUE; }
126 if (TOKEN_IS_KEYWORD(value, "false")) { return TOKEN_FALSE; }
127
128 return TOKEN_SYMBOL;
129}
130
131Tokens
132tokenize(const StringView *sv) {
133 Tokens tokens = {0};
134 tokens.tokens = NULL;
135 array_init(tokens.tokens, 1);
136 Scanner scanner = (Scanner){
137 .current = *sv,
138 .line_number = 1,
139 .col_number = 1,
140 };
141
142 while (scan_has_next(&scanner)) {
143 skip_whitespace(&scanner);
144 size_t line = scanner.line_number;
145 size_t col = scanner.col_number;
146 size_t offset = scanner.offset;
147 char c = scan_next(&scanner);
148 switch (c) {
149 case ';': {
150 while ((c = scan_next(&scanner)) != '\n' && c != '\0') {}
151 } break;
152 case '"': {
153 char prev = c;
154 bool found = false;
155 size_t n = 0;
156 while (scan_has_next(&scanner)) {
157 c = scan_next(&scanner);
158 if (c == '"' && prev != '\\') {
159 found = true;
160 break;
161 }
162 prev = c;
163 n++;
164 }
165 if (!found) {
166 error_push(&tokens.errors, (Error){
167 .type = ERR_TYPE_LEXER,
168 .value = ERR_UNMATCHED_STRING,
169 .line = line,
170 .col = col,
171 });
172 return tokens;
173 }
174 Token token = (Token){
175 .value = (StringView){
176 .start = &sv->start[offset + 1],
177 .n = n,
178 },
179 .type = TOKEN_STRING,
180 .line = line,
181 .column = col,
182 };
183 array_push(tokens.tokens, token);
184 } break;
185 case '(': {
186 if (scan_peek(&scanner) == ')') {
187 scan_next(&scanner);
188 Token token = (Token){
189 .type = TOKEN_NIL,
190 .line = line,
191 .column = col,
192 };
193 array_push(tokens.tokens, token);
194 } else {
195 Token token = (Token){
196 .type = TOKEN_LPAREN,
197 .line = line,
198 .column = col,
199 };
200 array_push(tokens.tokens, token);
201 }
202 } break;
203 case ')': {
204 Token token = (Token){
205 .type = TOKEN_RPAREN,
206 .line = line,
207 .column = col,
208 };
209 array_push(tokens.tokens, token);
210 } break;
211 default: {
212 size_t n = 1;
213 while (!is_delimiter(scan_peek(&scanner))) {
214 scan_next(&scanner);
215 n++;
216 }
217 if (c == EOF || c == '\0') {
218 break;
219 }
220 Token token = (Token){
221 .value = (StringView){
222 .start = &sv->start[offset],
223 .n = n,
224 },
225 .type = TOKEN_SYMBOL,
226 .line = line,
227 .column = col,
228 };
229 token.type = find_primitive_type(token.value);
230 array_push(tokens.tokens, token);
231 } break;
232 }
233 }
234
235 // Push EOF token.
236 Token token = (Token){
237 .type = TOKEN_EOF,
238 .line = scanner.line_number,
239 .column = 1,
240 };
241 array_push(tokens.tokens, token);
242
243 return tokens;
244}