diff options
author | Bad Diode <bd@badd10de.dev> | 2024-06-15 13:00:59 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2024-06-15 13:00:59 +0200 |
commit | 805efd71e0f5b10a6e78da08565407ec0a3649fe (patch) | |
tree | 895d44056643337aafad451749790ba438732428 /src | |
parent | 4c92dafaea614d50903d5adc61c069b21d42a9cf (diff) | |
download | bdl-805efd71e0f5b10a6e78da08565407ec0a3649fe.tar.gz bdl-805efd71e0f5b10a6e78da08565407ec0a3649fe.zip |
Add initial scanner functions
Diffstat (limited to 'src')
-rw-r--r-- | src/badlib.h | 20 | ||||
-rw-r--r-- | src/main.c | 296 |
2 files changed, 277 insertions, 39 deletions
diff --git a/src/badlib.h b/src/badlib.h index 9bab563..a91fcf8 100644 --- a/src/badlib.h +++ b/src/badlib.h | |||
@@ -571,6 +571,9 @@ str_next(Str *s) { | |||
571 | char | 571 | char |
572 | str_peek(Str s) { | 572 | str_peek(Str s) { |
573 | assert(s.mem); | 573 | assert(s.mem); |
574 | if (s.size == 0) { | ||
575 | return EOF; | ||
576 | } | ||
574 | return *s.mem; | 577 | return *s.mem; |
575 | } | 578 | } |
576 | 579 | ||
@@ -936,7 +939,6 @@ typedef enum { | |||
936 | FILE_ERR_OK = 0, | 939 | FILE_ERR_OK = 0, |
937 | FILE_ERR_CANT_OPEN, | 940 | FILE_ERR_CANT_OPEN, |
938 | FILE_ERR_READ_ERR, | 941 | FILE_ERR_READ_ERR, |
939 | FILE_ERR_PATH_TOO_BIG, | ||
940 | FILE_ERR_EMPTY, | 942 | FILE_ERR_EMPTY, |
941 | FILE_ERR_NUM, | 943 | FILE_ERR_NUM, |
942 | } FileErr; | 944 | } FileErr; |
@@ -945,8 +947,7 @@ Str file_err_str[FILE_ERR_NUM] = { | |||
945 | cstr(""), | 947 | cstr(""), |
946 | cstr("couldn't open file"), | 948 | cstr("couldn't open file"), |
947 | cstr("couldn't read file"), | 949 | cstr("couldn't read file"), |
948 | cstr("file path too big"), | 950 | cstr("empty file"), |
949 | cstr("file is empty"), | ||
950 | }; | 951 | }; |
951 | 952 | ||
952 | typedef struct FileContents { | 953 | typedef struct FileContents { |
@@ -958,19 +959,11 @@ typedef struct FileContents { | |||
958 | FileContents | 959 | FileContents |
959 | platform_read_file(Str path, Arena *a) { | 960 | platform_read_file(Str path, Arena *a) { |
960 | // Transform Str to cstr. | 961 | // Transform Str to cstr. |
961 | char path_str[KB(1)]; | 962 | Str path_str = str_concat(path, cstr("\0"), a); |
962 | if (path.size >= KB(1) - 1) { | ||
963 | return (FileContents){ | ||
964 | .path = path, | ||
965 | .err = FILE_ERR_PATH_TOO_BIG, | ||
966 | }; | ||
967 | } | ||
968 | memcpy(path_str, path.mem, path.size); | ||
969 | path_str[path.size] = 0; | ||
970 | 963 | ||
971 | // Read the entire file into memory. | 964 | // Read the entire file into memory. |
972 | sz file_size = 0; | 965 | sz file_size = 0; |
973 | FILE *fp = fopen(path_str, "rb+"); | 966 | FILE *fp = fopen((char*)path_str.mem, "rb+"); |
974 | if (!fp) { | 967 | if (!fp) { |
975 | return (FileContents){ | 968 | return (FileContents){ |
976 | .path = path, | 969 | .path = path, |
@@ -989,7 +982,6 @@ platform_read_file(Str path, Arena *a) { | |||
989 | .err = FILE_ERR_EMPTY, | 982 | .err = FILE_ERR_EMPTY, |
990 | }; | 983 | }; |
991 | } | 984 | } |
992 | |||
993 | return (FileContents){ | 985 | return (FileContents){ |
994 | .path = path, | 986 | .path = path, |
995 | .data = (Array){.mem = memory, .size = file_size}, | 987 | .data = (Array){.mem = memory, .size = file_size}, |
@@ -21,31 +21,275 @@ init(void) { | |||
21 | log_init_default(); | 21 | log_init_default(); |
22 | } | 22 | } |
23 | 23 | ||
24 | typedef enum TokenType { | ||
25 | TOK_UNKNOWN = 0, | ||
26 | |||
27 | // Parentheses. | ||
28 | TOK_LPAREN, | ||
29 | TOK_RPAREN, | ||
30 | TOK_LSQUARE, | ||
31 | TOK_RSQUARE, | ||
32 | TOK_LCURLY, | ||
33 | TOK_RCURLY, | ||
34 | |||
35 | // Basic literals. | ||
36 | TOK_NUMBER, | ||
37 | TOK_SYMBOL, | ||
38 | TOK_STRING, | ||
39 | TOK_NIL, | ||
40 | TOK_TRUE, | ||
41 | TOK_FALSE, | ||
42 | |||
43 | // Keywords. | ||
44 | TOK_LET, | ||
45 | TOK_SET, | ||
46 | TOK_FUN, | ||
47 | TOK_STRUCT, | ||
48 | TOK_IF, | ||
49 | TOK_MATCH, | ||
50 | TOK_CASE, | ||
51 | TOK_WHILE, | ||
52 | TOK_CONTINUE, | ||
53 | TOK_BREAK, | ||
54 | TOK_RETURN, | ||
55 | |||
56 | // Arithmetic ops. | ||
57 | TOK_ADD, | ||
58 | TOK_SUB, | ||
59 | TOK_MUL, | ||
60 | TOK_DIV, | ||
61 | TOK_MOD, | ||
62 | |||
63 | // Boolean ops. | ||
64 | TOK_NOT, | ||
65 | TOK_AND, | ||
66 | TOK_OR, | ||
67 | TOK_EQ, | ||
68 | TOK_LT, | ||
69 | TOK_GT, | ||
70 | TOK_LE, | ||
71 | TOK_GE, | ||
72 | |||
73 | // Bitwise ops. | ||
74 | TOK_BITNOT, | ||
75 | TOK_BITAND, | ||
76 | TOK_BITOR, | ||
77 | TOK_BITLSHIFT, | ||
78 | TOK_BITRSHIFT, | ||
79 | |||
80 | // Special ops. | ||
81 | TOK_COLON, | ||
82 | TOK_DOT, | ||
83 | TOK_AT, | ||
84 | TOK_ASSIGN, | ||
85 | |||
86 | // End of file. | ||
87 | TOK_EOF, | ||
88 | } TokenType; | ||
89 | |||
90 | Str token_str[] = { | ||
91 | [TOK_UNKNOWN] = cstr("UNKNOWN"), | ||
92 | |||
93 | // Parentheses. | ||
94 | [TOK_LPAREN] = cstr("LPAREN"), | ||
95 | [TOK_RPAREN] = cstr("RPAREN"), | ||
96 | [TOK_LSQUARE] = cstr("LSQUARE"), | ||
97 | [TOK_RSQUARE] = cstr("RSQUARE"), | ||
98 | [TOK_LCURLY] = cstr("LCURLY"), | ||
99 | [TOK_RCURLY] = cstr("RCURLY"), | ||
100 | |||
101 | // Basic literals. | ||
102 | [TOK_NUMBER] = cstr("NUMBER"), | ||
103 | [TOK_SYMBOL] = cstr("SYMBOL"), | ||
104 | [TOK_STRING] = cstr("STRING"), | ||
105 | [TOK_NIL] = cstr("NIL"), | ||
106 | [TOK_TRUE] = cstr("TRUE"), | ||
107 | [TOK_FALSE] = cstr("FALSE"), | ||
108 | |||
109 | // Keywords. | ||
110 | [TOK_LET] = cstr("LET"), | ||
111 | [TOK_SET] = cstr("SET"), | ||
112 | [TOK_FUN] = cstr("FUN"), | ||
113 | [TOK_STRUCT] = cstr("STRUCT"), | ||
114 | [TOK_IF] = cstr("IF"), | ||
115 | [TOK_MATCH] = cstr("MATCH"), | ||
116 | [TOK_CASE] = cstr("CASE"), | ||
117 | [TOK_WHILE] = cstr("WHILE"), | ||
118 | [TOK_CONTINUE] = cstr("CONTINUE"), | ||
119 | [TOK_BREAK] = cstr("BREAK"), | ||
120 | [TOK_RETURN] = cstr("RETURN"), | ||
121 | |||
122 | // Arithmetic ops. | ||
123 | [TOK_ADD] = cstr("ADD"), | ||
124 | [TOK_SUB] = cstr("SUB"), | ||
125 | [TOK_MUL] = cstr("MUL"), | ||
126 | [TOK_DIV] = cstr("DIV"), | ||
127 | [TOK_MOD] = cstr("MOD"), | ||
128 | |||
129 | // Boolean ops. | ||
130 | [TOK_NOT] = cstr("NOT"), | ||
131 | [TOK_AND] = cstr("AND"), | ||
132 | [TOK_OR] = cstr("OR"), | ||
133 | [TOK_EQ] = cstr("EQ"), | ||
134 | [TOK_LT] = cstr("LT"), | ||
135 | [TOK_GT] = cstr("GT"), | ||
136 | [TOK_LE] = cstr("LE"), | ||
137 | [TOK_GE] = cstr("GE"), | ||
138 | |||
139 | // Bitwise ops. | ||
140 | [TOK_BITNOT] = cstr("BITNOT"), | ||
141 | [TOK_BITAND] = cstr("BITAND"), | ||
142 | [TOK_BITOR] = cstr("BITOR"), | ||
143 | [TOK_BITLSHIFT] = cstr("BITLSHIFT"), | ||
144 | [TOK_BITRSHIFT] = cstr("BITRSHIFT"), | ||
145 | |||
146 | // Special ops. | ||
147 | [TOK_COLON] = cstr("COLON"), | ||
148 | [TOK_DOT] = cstr("DOT"), | ||
149 | [TOK_AT] = cstr("AT"), | ||
150 | [TOK_ASSIGN] = cstr("ASSIGN"), | ||
151 | |||
152 | // End of file. | ||
153 | [TOK_EOF] = cstr("EOF"), | ||
154 | }; | ||
155 | |||
156 | typedef struct Token { | ||
157 | TokenType type; | ||
158 | Str val; | ||
159 | sz line; | ||
160 | sz col; | ||
161 | } Token; | ||
162 | |||
163 | typedef struct Scanner { | ||
164 | Str str; | ||
165 | sz line; | ||
166 | sz col; | ||
167 | Arena *storage; | ||
168 | } Scanner; | ||
169 | |||
170 | char | ||
171 | scan_next(Scanner *scanner) { | ||
172 | char c = str_next(&scanner->str); | ||
173 | if (c == '\n') { | ||
174 | scanner->line++; | ||
175 | scanner->col = 0; | ||
176 | } else { | ||
177 | scanner->col++; | ||
178 | } | ||
179 | return c; | ||
180 | } | ||
181 | |||
182 | bool | ||
183 | scan_has_next(Scanner *scanner) { | ||
184 | return scanner->str.size; | ||
185 | } | ||
186 | |||
187 | char | ||
188 | scan_peek(Scanner *scanner) { | ||
189 | return str_peek(scanner->str); | ||
190 | } | ||
191 | |||
192 | Token | ||
193 | emit_token(Scanner *scanner, TokenType t) { | ||
194 | return (Token){ | ||
195 | .line = scanner->line + 1, | ||
196 | .col = scanner->col + 1, | ||
197 | .type = t, | ||
198 | }; | ||
199 | } | ||
200 | |||
201 | Token | ||
202 | emit_token_err(Scanner *scanner, Str err_msg) { | ||
203 | return (Token){ | ||
204 | .line = scanner->line + 1, | ||
205 | .col = scanner->col + 1, | ||
206 | .val = err_msg, | ||
207 | .type = TOK_UNKNOWN, | ||
208 | }; | ||
209 | } | ||
210 | |||
211 | void | ||
212 | scan_skip_line(Scanner *scanner) { | ||
213 | SearchResult newline = array_find_next(scanner->str, cstr("\n")); | ||
214 | if (newline.found) { | ||
215 | scanner->str.mem += newline.pos + 1; | ||
216 | scanner->str.size -= newline.pos + 1; | ||
217 | scanner->line++; | ||
218 | scanner->col = 0; | ||
219 | } | ||
220 | } | ||
221 | |||
222 | void | ||
223 | scan_skip_whitespace(Scanner *scanner) { | ||
224 | while (scan_has_next(scanner)) { | ||
225 | char c = scan_peek(scanner); | ||
226 | switch (c) { | ||
227 | case ' ': | ||
228 | case ',': // We are currently considering commas as syntactic sugar. | ||
229 | case '\f': | ||
230 | case '\n': | ||
231 | case '\r': | ||
232 | case '\t': | ||
233 | case '\v': { | ||
234 | scan_next(scanner); | ||
235 | } break; | ||
236 | // Found a comment! (skip) | ||
237 | case ';': { | ||
238 | scan_skip_line(scanner); | ||
239 | } break; | ||
240 | default: { | ||
241 | return; | ||
242 | } break; | ||
243 | } | ||
244 | } | ||
245 | } | ||
246 | |||
247 | Token | ||
248 | scan_token(Scanner *scanner) { | ||
249 | assert(scanner); | ||
250 | |||
251 | scan_skip_whitespace(scanner); | ||
252 | if (!scan_has_next(scanner)) { | ||
253 | return emit_token(scanner, TOK_EOF); | ||
254 | } | ||
255 | |||
256 | Scanner current = *scanner; | ||
257 | char c = scan_next(scanner); | ||
258 | (void)c; | ||
259 | // TODO: rest of the operations... | ||
260 | |||
261 | // At this point we have an error, find the next newline. | ||
262 | scan_skip_line(scanner); | ||
263 | return emit_token_err(¤t, cstr("unexpected character")); | ||
264 | } | ||
265 | |||
24 | void | 266 | void |
25 | process_file(Str path) { | 267 | process_file(Str path) { |
26 | (void)path; | ||
27 | // println("processing: "); | ||
28 | // println("%s", path); | ||
29 | // println("initialing memory..."); | ||
30 | Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); | 268 | Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); |
31 | u8 *mem; | 269 | |
32 | mem = arena_malloc(16, &lexer_arena); | 270 | FileContents file = platform_read_file(path, &lexer_arena); |
33 | println("%{Arena}", &lexer_arena); | 271 | if (file.err) { |
34 | mem[0] = 255; | 272 | printf("file.err: %d\n", file.err); |
35 | mem[1] = 255; | 273 | eprintln("%s: error: %s", path, cstr("WOT")); |
36 | mem[2] = 0xba; | 274 | return; |
37 | mem[3] = 0xdd; | 275 | } |
38 | mem[4] = 0x10; | 276 | |
39 | mem[5] = 0xde; | 277 | Scanner scanner = { |
40 | mem[6] = 255; | 278 | .str = file.data, |
41 | mem[7] = 255; | 279 | .storage = &lexer_arena, |
42 | println("%{Arena}", &lexer_arena); | 280 | }; |
43 | arena_malloc(8, &lexer_arena); | 281 | Token tok = {0}; |
44 | mem = arena_realloc(mem, 16, 32, &lexer_arena); | 282 | while (tok.type != TOK_EOF) { |
45 | // arena_free(mem, 32, &lexer_arena); | 283 | tok = scan_token(&scanner); |
46 | println("%{Arena}", &lexer_arena); | 284 | println("%s:%d:%d:%s %s", path, tok.line, tok.col, token_str[tok.type], |
47 | // println("reading file..."); | 285 | tok.val); |
48 | // FileContents file = platform_read_file(path, &lexer_arena); | 286 | } |
287 | // while (true) { | ||
288 | // Token tok = scan_token(&scanner); | ||
289 | // println("%s:%d:%d:%s %s", path, tok.line, tok.col, token_str[tok.type], | ||
290 | // tok.val); | ||
291 | // if (tok.type == TOK_EOF) break; | ||
292 | // } | ||
49 | 293 | ||
50 | // Str scanner = file.data; | 294 | // Str scanner = file.data; |
51 | // // NOTE: Testing file read line by line. | 295 | // // NOTE: Testing file read line by line. |
@@ -57,10 +301,12 @@ process_file(Str path) { | |||
57 | // println("<<< %x{4} %b{4} %f{2} %s %{Arena} >>>", 123, 3, 1.345, | 301 | // println("<<< %x{4} %b{4} %f{2} %s %{Arena} >>>", 123, 3, 1.345, |
58 | // cstr("BOOM!"), &logger_inf.storage); | 302 | // cstr("BOOM!"), &logger_inf.storage); |
59 | 303 | ||
60 | println("%{Mem}", &(Array){lexer_arena.beg, lexer_arena.size}); | 304 | // println("%{Mem}", &(Array){lexer_arena.beg, lexer_arena.size}); |
61 | eprintln("%s:%d:%d: %s -> %c", path, 1, 1, cstr("error: testing string logger"), 'X'); | 305 | // eprintln("%s:%d:%d: %s -> %c", path, 1, 1, cstr("error: testing string |
62 | // while (true) {} | 306 | // logger"), 'X'); while (true) {} |
63 | // TODO: run lexer. | 307 | // TODO: run lexer. |
308 | // Free up resources. | ||
309 | arena_destroy(&lexer_arena, os_allocator); | ||
64 | } | 310 | } |
65 | 311 | ||
66 | #ifndef BIN_NAME | 312 | #ifndef BIN_NAME |