aboutsummaryrefslogtreecommitdiffstats
path: root/src/bootstrap/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/bootstrap/lexer.c')
-rw-r--r--src/bootstrap/lexer.c207
1 files changed, 0 insertions, 207 deletions
diff --git a/src/bootstrap/lexer.c b/src/bootstrap/lexer.c
deleted file mode 100644
index b03db77..0000000
--- a/src/bootstrap/lexer.c
+++ /dev/null
@@ -1,207 +0,0 @@
1typedef enum TokenType {
2 TOKEN_UNKNOWN = 0,
3 TOKEN_LPAREN,
4 TOKEN_RPAREN,
5 TOKEN_FIXNUM,
6 TOKEN_SYMBOL,
7 TOKEN_BOOL,
8 TOKEN_STRING,
9} TokenType;
10
11typedef struct Token {
12 TokenType type;
13 StringView value;
14} Token;
15
16typedef struct Tokens {
17 Token *start;
18 size_t n;
19} Tokens;
20
21#define TRUE_TOKEN (StringView){"true", 4}
22#define FALSE_TOKEN (StringView){"false", 5}
23#define LPAREN_TOKEN (StringView){"(", 1}
24#define RPAREN_TOKEN (StringView){")", 1}
25
26TokenType
27find_token_type(StringView value) {
28 bool is_fixnum = true;
29 for (size_t i = 0; i < value.n; i++) {
30 char c = value.start[i];
31 if (i == 0 && c == '-' && value.n > 1) {
32 continue;
33 }
34 if (!isdigit(c)) {
35 is_fixnum = false;
36 break;
37 }
38 }
39 if (is_fixnum) {
40 return TOKEN_FIXNUM;
41 }
42
43 if (sv_equal(value, TRUE_TOKEN) || sv_equal(value, FALSE_TOKEN)) {
44 return TOKEN_BOOL;
45 }
46
47 return TOKEN_SYMBOL;
48}
49
50Tokens
51tokenize(StringView sv) {
52 // NOTE: Not allocating any memory for now, but we are limited by a maximum
53 // number of tokens we can process.
54 #define TOKENS_BUF_SIZE KB(64)
55 static Token tokens_buf[TOKENS_BUF_SIZE];
56
57 // Clear buffer.
58 for (size_t i = 0; i < TOKENS_BUF_SIZE; i++) {
59 tokens_buf[i] = (Token){0};
60 }
61
62 size_t n = 0;
63 size_t token_n = 0;
64 for (size_t i = 0; i < sv.n; i++) {
65 switch (sv.start[i]) {
66 case ' ':
67 case '\f':
68 case '\n':
69 case '\r':
70 case '\t':
71 case '\v': {
72 if (token_n != 0) {
73 Token token = (Token){
74 .type = TOKEN_UNKNOWN,
75 .value = (StringView){
76 .start = &sv.start[i - token_n],
77 .n = token_n,
78 }
79 };
80 token.type = find_token_type(token.value);
81 tokens_buf[n++] = token;
82 token_n = 0;
83 }
84 } break;
85 case ';': {
86 if (token_n != 0) {
87 Token token = (Token){
88 .type = TOKEN_UNKNOWN,
89 .value = (StringView){
90 .start = &sv.start[i - token_n],
91 .n = token_n,
92 }
93 };
94 token.type = find_token_type(token.value);
95 tokens_buf[n++] = token;
96 token_n = 0;
97 }
98
99 // Advance until the next newline.
100 do {
101 i++;
102 } while (i < sv.n && sv.start[(i + 1)] != '\n');
103 } break;
104 case '"': {
105 if (token_n != 0) {
106 fprintf(stderr, "error: string started inside symbol\n");
107 return (Tokens){0};
108 }
109
110 // Find end delimiter.
111 size_t string_start = i + 1;
112 size_t string_end = i + 1;
113 while (true) {
114 if (sv.start[string_end] == '"' && sv.start[string_end - 1] != '\\') {
115 break;
116 }
117 if (string_end >= sv.n) {
118 fprintf(stderr, "error: string delimiter not found\n");
119 return (Tokens){0};
120 }
121 string_end++;
122 }
123
124 Token token = (Token){
125 .type = TOKEN_STRING,
126 .value = (StringView){
127 .start = &sv.start[string_start],
128 .n = string_end - string_start,
129 }
130 };
131 tokens_buf[n++] = token;
132 token_n = 0;
133 i += string_end - string_start + 1;
134 } break;
135 case '(': {
136 if ((i + 1) < sv.n) {
137 char next_c = sv.start[i + 1];
138 if (isspace(next_c)) {
139 fprintf(stderr, "error: lparen delimiter followed by space\n");
140 return (Tokens){0};
141 }
142 }
143
144 if (token_n != 0) {
145 fprintf(stderr, "error: lparen delimiter within symbol name\n");
146 return (Tokens){0};
147 }
148
149 Token token = (Token){
150 .type = TOKEN_LPAREN,
151 .value = LPAREN_TOKEN,
152 };
153 tokens_buf[n++] = token;
154 } break;
155 case ')': {
156 if ((i + 1) < sv.n) {
157 char next_c = sv.start[i + 1];
158 if ((next_c != ')' && !isspace(next_c))) {
159 fprintf(stderr, "error: rparen delimiter within symbol name\n");
160 return (Tokens){0};
161 }
162 }
163
164 if (token_n != 0) {
165 // Push previous token.
166 Token token = (Token){
167 .type = TOKEN_UNKNOWN,
168 .value = (StringView){
169 .start = &sv.start[i - token_n],
170 .n = token_n,
171 }
172 };
173 token.type = find_token_type(token.value);
174 tokens_buf[n++] = token;
175 token_n = 0;
176 }
177
178 Token token = (Token){
179 .type = TOKEN_RPAREN,
180 .value = RPAREN_TOKEN,
181 };
182 tokens_buf[n++] = token;
183 } break;
184 case EOF: {
185 break;
186 } break;
187 default: {
188 token_n++;
189 } break;
190 }
191 }
192 if (token_n != 0) {
193 // End of line encountered.
194 Token token = (Token){
195 .type = TOKEN_UNKNOWN,
196 .value = (StringView){
197 .start = &sv.start[sv.n - token_n],
198 .n = token_n,
199 }
200 };
201 token.type = find_token_type(token.value);
202 tokens_buf[n++] = token;
203 }
204
205 return (Tokens){.start = (Token *)&tokens_buf, .n = n};
206}
207