aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2024-06-15 13:00:59 +0200
committerBad Diode <bd@badd10de.dev>2024-06-15 13:00:59 +0200
commit805efd71e0f5b10a6e78da08565407ec0a3649fe (patch)
tree895d44056643337aafad451749790ba438732428 /src
parent4c92dafaea614d50903d5adc61c069b21d42a9cf (diff)
downloadbdl-805efd71e0f5b10a6e78da08565407ec0a3649fe.tar.gz
bdl-805efd71e0f5b10a6e78da08565407ec0a3649fe.zip
Add initial scanner functions
Diffstat (limited to 'src')
-rw-r--r--src/badlib.h20
-rw-r--r--src/main.c296
2 files changed, 277 insertions, 39 deletions
diff --git a/src/badlib.h b/src/badlib.h
index 9bab563..a91fcf8 100644
--- a/src/badlib.h
+++ b/src/badlib.h
@@ -571,6 +571,9 @@ str_next(Str *s) {
571char 571char
572str_peek(Str s) { 572str_peek(Str s) {
573 assert(s.mem); 573 assert(s.mem);
574 if (s.size == 0) {
575 return EOF;
576 }
574 return *s.mem; 577 return *s.mem;
575} 578}
576 579
@@ -936,7 +939,6 @@ typedef enum {
936 FILE_ERR_OK = 0, 939 FILE_ERR_OK = 0,
937 FILE_ERR_CANT_OPEN, 940 FILE_ERR_CANT_OPEN,
938 FILE_ERR_READ_ERR, 941 FILE_ERR_READ_ERR,
939 FILE_ERR_PATH_TOO_BIG,
940 FILE_ERR_EMPTY, 942 FILE_ERR_EMPTY,
941 FILE_ERR_NUM, 943 FILE_ERR_NUM,
942} FileErr; 944} FileErr;
@@ -945,8 +947,7 @@ Str file_err_str[FILE_ERR_NUM] = {
945 cstr(""), 947 cstr(""),
946 cstr("couldn't open file"), 948 cstr("couldn't open file"),
947 cstr("couldn't read file"), 949 cstr("couldn't read file"),
948 cstr("file path too big"), 950 cstr("empty file"),
949 cstr("file is empty"),
950}; 951};
951 952
952typedef struct FileContents { 953typedef struct FileContents {
@@ -958,19 +959,11 @@ typedef struct FileContents {
958FileContents 959FileContents
959platform_read_file(Str path, Arena *a) { 960platform_read_file(Str path, Arena *a) {
960 // Transform Str to cstr. 961 // Transform Str to cstr.
961 char path_str[KB(1)]; 962 Str path_str = str_concat(path, cstr("\0"), a);
962 if (path.size >= KB(1) - 1) {
963 return (FileContents){
964 .path = path,
965 .err = FILE_ERR_PATH_TOO_BIG,
966 };
967 }
968 memcpy(path_str, path.mem, path.size);
969 path_str[path.size] = 0;
970 963
971 // Read the entire file into memory. 964 // Read the entire file into memory.
972 sz file_size = 0; 965 sz file_size = 0;
973 FILE *fp = fopen(path_str, "rb+"); 966 FILE *fp = fopen((char*)path_str.mem, "rb+");
974 if (!fp) { 967 if (!fp) {
975 return (FileContents){ 968 return (FileContents){
976 .path = path, 969 .path = path,
@@ -989,7 +982,6 @@ platform_read_file(Str path, Arena *a) {
989 .err = FILE_ERR_EMPTY, 982 .err = FILE_ERR_EMPTY,
990 }; 983 };
991 } 984 }
992
993 return (FileContents){ 985 return (FileContents){
994 .path = path, 986 .path = path,
995 .data = (Array){.mem = memory, .size = file_size}, 987 .data = (Array){.mem = memory, .size = file_size},
diff --git a/src/main.c b/src/main.c
index 2b51d2c..6be1e88 100644
--- a/src/main.c
+++ b/src/main.c
@@ -21,31 +21,275 @@ init(void) {
21 log_init_default(); 21 log_init_default();
22} 22}
23 23
24typedef enum TokenType {
25 TOK_UNKNOWN = 0,
26
27 // Parentheses.
28 TOK_LPAREN,
29 TOK_RPAREN,
30 TOK_LSQUARE,
31 TOK_RSQUARE,
32 TOK_LCURLY,
33 TOK_RCURLY,
34
35 // Basic literals.
36 TOK_NUMBER,
37 TOK_SYMBOL,
38 TOK_STRING,
39 TOK_NIL,
40 TOK_TRUE,
41 TOK_FALSE,
42
43 // Keywords.
44 TOK_LET,
45 TOK_SET,
46 TOK_FUN,
47 TOK_STRUCT,
48 TOK_IF,
49 TOK_MATCH,
50 TOK_CASE,
51 TOK_WHILE,
52 TOK_CONTINUE,
53 TOK_BREAK,
54 TOK_RETURN,
55
56 // Arithmetic ops.
57 TOK_ADD,
58 TOK_SUB,
59 TOK_MUL,
60 TOK_DIV,
61 TOK_MOD,
62
63 // Boolean ops.
64 TOK_NOT,
65 TOK_AND,
66 TOK_OR,
67 TOK_EQ,
68 TOK_LT,
69 TOK_GT,
70 TOK_LE,
71 TOK_GE,
72
73 // Bitwise ops.
74 TOK_BITNOT,
75 TOK_BITAND,
76 TOK_BITOR,
77 TOK_BITLSHIFT,
78 TOK_BITRSHIFT,
79
80 // Special ops.
81 TOK_COLON,
82 TOK_DOT,
83 TOK_AT,
84 TOK_ASSIGN,
85
86 // End of file.
87 TOK_EOF,
88} TokenType;
89
90Str token_str[] = {
91 [TOK_UNKNOWN] = cstr("UNKNOWN"),
92
93 // Parentheses.
94 [TOK_LPAREN] = cstr("LPAREN"),
95 [TOK_RPAREN] = cstr("RPAREN"),
96 [TOK_LSQUARE] = cstr("LSQUARE"),
97 [TOK_RSQUARE] = cstr("RSQUARE"),
98 [TOK_LCURLY] = cstr("LCURLY"),
99 [TOK_RCURLY] = cstr("RCURLY"),
100
101 // Basic literals.
102 [TOK_NUMBER] = cstr("NUMBER"),
103 [TOK_SYMBOL] = cstr("SYMBOL"),
104 [TOK_STRING] = cstr("STRING"),
105 [TOK_NIL] = cstr("NIL"),
106 [TOK_TRUE] = cstr("TRUE"),
107 [TOK_FALSE] = cstr("FALSE"),
108
109 // Keywords.
110 [TOK_LET] = cstr("LET"),
111 [TOK_SET] = cstr("SET"),
112 [TOK_FUN] = cstr("FUN"),
113 [TOK_STRUCT] = cstr("STRUCT"),
114 [TOK_IF] = cstr("IF"),
115 [TOK_MATCH] = cstr("MATCH"),
116 [TOK_CASE] = cstr("CASE"),
117 [TOK_WHILE] = cstr("WHILE"),
118 [TOK_CONTINUE] = cstr("CONTINUE"),
119 [TOK_BREAK] = cstr("BREAK"),
120 [TOK_RETURN] = cstr("RETURN"),
121
122 // Arithmetic ops.
123 [TOK_ADD] = cstr("ADD"),
124 [TOK_SUB] = cstr("SUB"),
125 [TOK_MUL] = cstr("MUL"),
126 [TOK_DIV] = cstr("DIV"),
127 [TOK_MOD] = cstr("MOD"),
128
129 // Boolean ops.
130 [TOK_NOT] = cstr("NOT"),
131 [TOK_AND] = cstr("AND"),
132 [TOK_OR] = cstr("OR"),
133 [TOK_EQ] = cstr("EQ"),
134 [TOK_LT] = cstr("LT"),
135 [TOK_GT] = cstr("GT"),
136 [TOK_LE] = cstr("LE"),
137 [TOK_GE] = cstr("GE"),
138
139 // Bitwise ops.
140 [TOK_BITNOT] = cstr("BITNOT"),
141 [TOK_BITAND] = cstr("BITAND"),
142 [TOK_BITOR] = cstr("BITOR"),
143 [TOK_BITLSHIFT] = cstr("BITLSHIFT"),
144 [TOK_BITRSHIFT] = cstr("BITRSHIFT"),
145
146 // Special ops.
147 [TOK_COLON] = cstr("COLON"),
148 [TOK_DOT] = cstr("DOT"),
149 [TOK_AT] = cstr("AT"),
150 [TOK_ASSIGN] = cstr("ASSIGN"),
151
152 // End of file.
153 [TOK_EOF] = cstr("EOF"),
154};
155
156typedef struct Token {
157 TokenType type;
158 Str val;
159 sz line;
160 sz col;
161} Token;
162
163typedef struct Scanner {
164 Str str;
165 sz line;
166 sz col;
167 Arena *storage;
168} Scanner;
169
170char
171scan_next(Scanner *scanner) {
172 char c = str_next(&scanner->str);
173 if (c == '\n') {
174 scanner->line++;
175 scanner->col = 0;
176 } else {
177 scanner->col++;
178 }
179 return c;
180}
181
182bool
183scan_has_next(Scanner *scanner) {
184 return scanner->str.size;
185}
186
187char
188scan_peek(Scanner *scanner) {
189 return str_peek(scanner->str);
190}
191
192Token
193emit_token(Scanner *scanner, TokenType t) {
194 return (Token){
195 .line = scanner->line + 1,
196 .col = scanner->col + 1,
197 .type = t,
198 };
199}
200
201Token
202emit_token_err(Scanner *scanner, Str err_msg) {
203 return (Token){
204 .line = scanner->line + 1,
205 .col = scanner->col + 1,
206 .val = err_msg,
207 .type = TOK_UNKNOWN,
208 };
209}
210
211void
212scan_skip_line(Scanner *scanner) {
213 SearchResult newline = array_find_next(scanner->str, cstr("\n"));
214 if (newline.found) {
215 scanner->str.mem += newline.pos + 1;
216 scanner->str.size -= newline.pos + 1;
217 scanner->line++;
218 scanner->col = 0;
219 }
220}
221
222void
223scan_skip_whitespace(Scanner *scanner) {
224 while (scan_has_next(scanner)) {
225 char c = scan_peek(scanner);
226 switch (c) {
227 case ' ':
228 case ',': // We are currently considering commas as syntactic sugar.
229 case '\f':
230 case '\n':
231 case '\r':
232 case '\t':
233 case '\v': {
234 scan_next(scanner);
235 } break;
236 // Found a comment! (skip)
237 case ';': {
238 scan_skip_line(scanner);
239 } break;
240 default: {
241 return;
242 } break;
243 }
244 }
245}
246
247Token
248scan_token(Scanner *scanner) {
249 assert(scanner);
250
251 scan_skip_whitespace(scanner);
252 if (!scan_has_next(scanner)) {
253 return emit_token(scanner, TOK_EOF);
254 }
255
256 Scanner current = *scanner;
257 char c = scan_next(scanner);
258 (void)c;
259 // TODO: rest of the operations...
260
261 // At this point we have an error, find the next newline.
262 scan_skip_line(scanner);
263 return emit_token_err(&current, cstr("unexpected character"));
264}
265
24void 266void
25process_file(Str path) { 267process_file(Str path) {
26 (void)path;
27 // println("processing: ");
28 // println("%s", path);
29 // println("initialing memory...");
30 Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); 268 Arena lexer_arena = arena_create(LEXER_MEM, os_allocator);
31 u8 *mem; 269
32 mem = arena_malloc(16, &lexer_arena); 270 FileContents file = platform_read_file(path, &lexer_arena);
33 println("%{Arena}", &lexer_arena); 271 if (file.err) {
34 mem[0] = 255; 272 printf("file.err: %d\n", file.err);
35 mem[1] = 255; 273 eprintln("%s: error: %s", path, cstr("WOT"));
36 mem[2] = 0xba; 274 return;
37 mem[3] = 0xdd; 275 }
38 mem[4] = 0x10; 276
39 mem[5] = 0xde; 277 Scanner scanner = {
40 mem[6] = 255; 278 .str = file.data,
41 mem[7] = 255; 279 .storage = &lexer_arena,
42 println("%{Arena}", &lexer_arena); 280 };
43 arena_malloc(8, &lexer_arena); 281 Token tok = {0};
44 mem = arena_realloc(mem, 16, 32, &lexer_arena); 282 while (tok.type != TOK_EOF) {
45 // arena_free(mem, 32, &lexer_arena); 283 tok = scan_token(&scanner);
46 println("%{Arena}", &lexer_arena); 284 println("%s:%d:%d:%s %s", path, tok.line, tok.col, token_str[tok.type],
47 // println("reading file..."); 285 tok.val);
48 // FileContents file = platform_read_file(path, &lexer_arena); 286 }
287 // while (true) {
288 // Token tok = scan_token(&scanner);
289 // println("%s:%d:%d:%s %s", path, tok.line, tok.col, token_str[tok.type],
290 // tok.val);
291 // if (tok.type == TOK_EOF) break;
292 // }
49 293
50 // Str scanner = file.data; 294 // Str scanner = file.data;
51 // // NOTE: Testing file read line by line. 295 // // NOTE: Testing file read line by line.
@@ -57,10 +301,12 @@ process_file(Str path) {
57 // println("<<< %x{4} %b{4} %f{2} %s %{Arena} >>>", 123, 3, 1.345, 301 // println("<<< %x{4} %b{4} %f{2} %s %{Arena} >>>", 123, 3, 1.345,
58 // cstr("BOOM!"), &logger_inf.storage); 302 // cstr("BOOM!"), &logger_inf.storage);
59 303
60 println("%{Mem}", &(Array){lexer_arena.beg, lexer_arena.size}); 304 // println("%{Mem}", &(Array){lexer_arena.beg, lexer_arena.size});
61 eprintln("%s:%d:%d: %s -> %c", path, 1, 1, cstr("error: testing string logger"), 'X'); 305 // eprintln("%s:%d:%d: %s -> %c", path, 1, 1, cstr("error: testing string
62 // while (true) {} 306 // logger"), 'X'); while (true) {}
63 // TODO: run lexer. 307 // TODO: run lexer.
308 // Free up resources.
309 arena_destroy(&lexer_arena, os_allocator);
64} 310}
65 311
66#ifndef BIN_NAME 312#ifndef BIN_NAME