diff options
author | Bad Diode <bd@badd10de.dev> | 2021-10-29 15:37:28 +0200 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2021-10-29 15:37:28 +0200 |
commit | e73a4c16a2269cdb2f5e7d66fb9839e4c44e14de (patch) | |
tree | c44721b005b7a0623e7acc7103ca8e21a25ff422 | |
parent | fcc131afdd029c606ea39f3557bc3d33a075b1de (diff) | |
download | bdl-e73a4c16a2269cdb2f5e7d66fb9839e4c44e14de.tar.gz bdl-e73a4c16a2269cdb2f5e7d66fb9839e4c44e14de.zip |
Prepare third compiler implementation
-rwxr-xr-x | Makefile | 13 | ||||
-rw-r--r-- | src/common.h | 31 | ||||
-rwxr-xr-x | src/darray.h | 81 | ||||
-rwxr-xr-x | src/errors.c | 46 | ||||
-rwxr-xr-x | src/errors.h | 48 | ||||
-rwxr-xr-x | src/lexer.c | 244 | ||||
-rwxr-xr-x | src/lexer.h | 67 | ||||
-rwxr-xr-x | src/main.c | 137 | ||||
-rwxr-xr-x | src/string_view.c | 40 | ||||
-rwxr-xr-x | src/string_view.h | 25 |
10 files changed, 724 insertions, 8 deletions
@@ -2,7 +2,7 @@ | |||
2 | .SUFFIXES: | 2 | .SUFFIXES: |
3 | 3 | ||
4 | # Source code location and files to watch for changes. | 4 | # Source code location and files to watch for changes. |
5 | SRC_DIR := src/bytecode | 5 | SRC_DIR := src |
6 | BUILD_DIR := build | 6 | BUILD_DIR := build |
7 | SRC_MAIN := $(SRC_DIR)/main.c | 7 | SRC_MAIN := $(SRC_DIR)/main.c |
8 | WATCH_SRC := $(shell find $(SRC_DIR) -name "*.c" -or -name "*.s" -or -name "*.h") | 8 | WATCH_SRC := $(shell find $(SRC_DIR) -name "*.c" -or -name "*.s" -or -name "*.h") |
@@ -22,7 +22,7 @@ LDLIBS := | |||
22 | RELEASE_CFLAGS := -DNDEBUG -O2 -static | 22 | RELEASE_CFLAGS := -DNDEBUG -O2 -static |
23 | DEBUG_CFLAGS := -DDEBUG -O0 -g | 23 | DEBUG_CFLAGS := -DDEBUG -O0 -g |
24 | 24 | ||
25 | .PHONY: build tests run clean | 25 | .PHONY: build tests clean |
26 | 26 | ||
27 | # Setup debug/release builds. | 27 | # Setup debug/release builds. |
28 | # make clean && make <target> DEBUG=0 | 28 | # make clean && make <target> DEBUG=0 |
@@ -45,15 +45,12 @@ $(BIN): $(SRC_MAIN) $(WATCH_SRC) $(BUILD_DIR) | |||
45 | $(BUILD_DIR): | 45 | $(BUILD_DIR): |
46 | mkdir -p $(BUILD_DIR) | 46 | mkdir -p $(BUILD_DIR) |
47 | 47 | ||
48 | run: $(BIN) | ||
49 | ./$(BIN) -i | ||
50 | |||
51 | tests: $(BIN) | 48 | tests: $(BIN) |
52 | ./$(BIN) examples/arithmetic.bdl | diff tests/arithmetic_expected.txt - | 49 | # ./$(BIN) examples/arithmetic.bdl | diff tests/arithmetic_expected.txt - |
53 | ./$(BIN) examples/booleans.bdl | diff tests/booleans_expected.txt - | 50 | # ./$(BIN) examples/booleans.bdl | diff tests/booleans_expected.txt - |
54 | # ./$(BIN) examples/lists.bdl | diff tests/lists_expected.txt - | 51 | # ./$(BIN) examples/lists.bdl | diff tests/lists_expected.txt - |
55 | # ./$(BIN) examples/types.bdl | diff tests/types_expected.txt - | 52 | # ./$(BIN) examples/types.bdl | diff tests/types_expected.txt - |
56 | ./$(BIN) examples/variables.bdl | diff tests/variables_expected.txt - | 53 | # ./$(BIN) examples/variables.bdl | diff tests/variables_expected.txt - |
57 | 54 | ||
58 | # Remove build directory. | 55 | # Remove build directory. |
59 | clean: | 56 | clean: |
diff --git a/src/common.h b/src/common.h new file mode 100644 index 0000000..08e78a8 --- /dev/null +++ b/src/common.h | |||
@@ -0,0 +1,31 @@ | |||
1 | #ifndef BDL_COMMON_H | ||
2 | #define BDL_COMMON_H | ||
3 | |||
4 | #include <assert.h> | ||
5 | #include <stdbool.h> | ||
6 | #include <stddef.h> | ||
7 | #include <stdint.h> | ||
8 | |||
9 | typedef uint8_t u8; | ||
10 | typedef uint16_t u16; | ||
11 | typedef uint32_t u32; | ||
12 | typedef uint64_t u64; | ||
13 | typedef int8_t s8; | ||
14 | typedef int16_t s16; | ||
15 | typedef int32_t s32; | ||
16 | typedef int64_t s64; | ||
17 | typedef volatile u8 vu8; | ||
18 | typedef volatile u16 vu16; | ||
19 | typedef volatile u32 vu32; | ||
20 | typedef volatile u64 vu64; | ||
21 | typedef volatile s8 vs8; | ||
22 | typedef volatile s16 vs16; | ||
23 | typedef volatile s32 vs32; | ||
24 | typedef volatile s64 vs64; | ||
25 | |||
26 | #define KB(N) ((u64)(N) * 1024) | ||
27 | #define MB(N) ((u64)KB(N) * 1024) | ||
28 | #define GB(N) ((u64)MB(N) * 1024) | ||
29 | #define TB(N) ((u64)GB(N) * 1024) | ||
30 | |||
31 | #endif // BDL_COMMON_H | ||
diff --git a/src/darray.h b/src/darray.h new file mode 100755 index 0000000..fa4e293 --- /dev/null +++ b/src/darray.h | |||
@@ -0,0 +1,81 @@ | |||
1 | #ifndef BDL_DARRAY_H | ||
2 | #define BDL_DARRAY_H | ||
3 | |||
4 | #include <string.h> | ||
5 | |||
6 | typedef struct ArrayHeader { | ||
7 | size_t size; | ||
8 | size_t cap; | ||
9 | } ArrayHeader; | ||
10 | |||
11 | // Header/Size/capacity accessors. | ||
12 | #define array_head(ARR) ((ArrayHeader *)((char *)(ARR) - sizeof(ArrayHeader))) | ||
13 | #define array_size(ARR) ((ARR) ? array_head(ARR)->size : 0) | ||
14 | #define array_cap(ARR) ((ARR) ? array_head(ARR)->cap : 0) | ||
15 | |||
16 | // Initialize a dynamic array ARR with N elements. The initialization doesn't | ||
17 | // zero out the data, so thread carefully.. | ||
18 | #define array_init(ARR,N) ((ARR) = _array_reserve(N, sizeof(*(ARR)))) | ||
19 | |||
20 | // Push a given element T to the dynamic array ARR. | ||
21 | #define array_push(ARR, T) \ | ||
22 | ((ARR) = _array_maybe_grow(ARR, sizeof(T)), \ | ||
23 | (ARR)[array_head(ARR)->size++] = (T)) | ||
24 | |||
25 | // Return the last element of the array. Can be used to build stacks. | ||
26 | #define array_pop(ARR) (ARR)[--array_head(ARR)->size] | ||
27 | |||
28 | // Return the value stored at the OFFSET position from the tail of the array. | ||
29 | #define array_peek(ARR, OFFSET) (ARR)[array_head(ARR)->size - 1 - (OFFSET)] | ||
30 | |||
31 | // Insert N bytes from the SRC array into the ARR dynamic array. | ||
32 | #define array_insert(ARR, SRC, N) \ | ||
33 | ((ARR) = _array_insert(ARR, SRC, N, sizeof(*(ARR)))) | ||
34 | |||
35 | // Free the memory from the original allocated position. | ||
36 | #define array_free(ARR) ((ARR) ? free(array_head(ARR)), (ARR) = NULL : 0) | ||
37 | |||
38 | static inline void * | ||
39 | _array_reserve(size_t num_elem, size_t type_size) { | ||
40 | char *p = malloc(num_elem * type_size + sizeof(ArrayHeader)); | ||
41 | p += sizeof(ArrayHeader); | ||
42 | array_head(p)->size = 0; | ||
43 | array_head(p)->cap = num_elem; | ||
44 | return p; | ||
45 | } | ||
46 | |||
47 | static inline void * | ||
48 | _array_maybe_grow(void *arr, size_t type_size) { | ||
49 | ArrayHeader *head = array_head(arr); | ||
50 | if (head->cap == head->size) { | ||
51 | if (head->cap == 0) { | ||
52 | head->cap++; | ||
53 | } else { | ||
54 | head->cap *= 2; | ||
55 | } | ||
56 | head = realloc(head, head->cap * type_size + sizeof(ArrayHeader)); | ||
57 | } | ||
58 | arr = (char *)head + sizeof(ArrayHeader); | ||
59 | return arr; | ||
60 | } | ||
61 | |||
62 | static inline | ||
63 | char * _array_insert(char *arr, const char *src, size_t n_bytes, size_t type_size) { | ||
64 | ArrayHeader *head = array_head(arr); | ||
65 | size_t new_size = n_bytes + head->size; | ||
66 | if (new_size > head->cap * type_size) { | ||
67 | if (head->cap == 0) { | ||
68 | head->cap = 1; | ||
69 | } | ||
70 | while (new_size >= head->cap * type_size) { | ||
71 | head->cap *= 2; | ||
72 | } | ||
73 | head = realloc(head, head->cap * type_size + sizeof(ArrayHeader)); | ||
74 | } | ||
75 | arr = (char *)head + sizeof(ArrayHeader); | ||
76 | memcpy((arr + head->size), src, n_bytes); | ||
77 | head->size = new_size; | ||
78 | return arr; | ||
79 | } | ||
80 | |||
81 | #endif // BDL_DARRAY_H | ||
diff --git a/src/errors.c b/src/errors.c new file mode 100755 index 0000000..11348fd --- /dev/null +++ b/src/errors.c | |||
@@ -0,0 +1,46 @@ | |||
1 | #include "errors.h" | ||
2 | |||
3 | static const char* error_msgs[] = { | ||
4 | [ERR_UNKNOWN] = "error: something unexpected happened", | ||
5 | [ERR_UNMATCHED_STRING] = "error: unmatched string delimiter", | ||
6 | [ERR_UNBALANCED_PAREN] = "error: unbalanced parentheses", | ||
7 | [ERR_NOT_IMPLEMENTED] = "error: not implemented", | ||
8 | [ERR_EOF_REACHED] = "error: EOF reached", | ||
9 | [ERR_UNKNOWN_TOKEN] = "error: unknown token", | ||
10 | [ERR_UNKNOWN_OBJ_TYPE] = "error: can't eval unknown object type", | ||
11 | [ERR_NOT_A_SYMBOL] = "error: object is not a symbol", | ||
12 | [ERR_SYMBOL_NOT_FOUND] = "error: symbol not found", | ||
13 | [ERR_NOT_CALLABLE] = "error: not callable", | ||
14 | [ERR_NOT_ENOUGH_ARGS] = "error: not enough arguments", | ||
15 | [ERR_TOO_MANY_ARGS] = "error: too many arguments", | ||
16 | [ERR_WRONG_ARG_TYPE] = "error: wrong argument type", | ||
17 | [ERR_DIVISION_BY_ZERO] = "error: division by zero", | ||
18 | [ERR_AMBIGUOUS_PARAMS] = "error: ambiguous parameter names", | ||
19 | }; | ||
20 | |||
21 | void | ||
22 | error_push(Errors *errors, Error error) { | ||
23 | if (errors->n < ERR_MAX_NUMBER) { | ||
24 | errors->errors[errors->n++] = error; | ||
25 | } | ||
26 | } | ||
27 | |||
28 | void | ||
29 | report_errors(Errors *errors, const char *file_name) { | ||
30 | for (size_t i = 0; i < errors->n; i++) { | ||
31 | Error err = errors->errors[i]; | ||
32 | fprintf(stderr, "%s", file_name); | ||
33 | if (err.line != 0) { | ||
34 | fprintf(stderr, ":%ld:%ld", err.line, err.col); | ||
35 | } | ||
36 | switch (err.type) { | ||
37 | case ERR_TYPE_LEXER: { fprintf(stderr, ": [lexer]"); } break; | ||
38 | case ERR_TYPE_COMPILER: { fprintf(stderr, ": [compiler]"); } break; | ||
39 | case ERR_TYPE_RUNTIME: { fprintf(stderr, ": [runtime]"); } break; | ||
40 | case ERR_TYPE_PARSER: { fprintf(stderr, ": [parser]"); } break; | ||
41 | default: break; | ||
42 | } | ||
43 | fprintf(stderr, " %s\n", error_msgs[err.value]); | ||
44 | } | ||
45 | errors->n = 0; | ||
46 | } | ||
diff --git a/src/errors.h b/src/errors.h new file mode 100755 index 0000000..7d8e977 --- /dev/null +++ b/src/errors.h | |||
@@ -0,0 +1,48 @@ | |||
1 | #ifndef BDL_ERRORS_H | ||
2 | #define BDL_ERRORS_H | ||
3 | |||
4 | #include "common.h" | ||
5 | |||
6 | typedef enum ErrorType { | ||
7 | ERR_TYPE_LEXER, | ||
8 | ERR_TYPE_PARSER, | ||
9 | ERR_TYPE_COMPILER, | ||
10 | ERR_TYPE_RUNTIME, | ||
11 | } ErrorType; | ||
12 | |||
13 | typedef enum ErrorValue { | ||
14 | ERR_UNKNOWN = 0, | ||
15 | ERR_UNMATCHED_STRING, | ||
16 | ERR_UNBALANCED_PAREN, | ||
17 | ERR_NOT_IMPLEMENTED, | ||
18 | ERR_EOF_REACHED, | ||
19 | ERR_UNKNOWN_TOKEN, | ||
20 | ERR_UNKNOWN_OBJ_TYPE, | ||
21 | ERR_NOT_A_SYMBOL, | ||
22 | ERR_SYMBOL_NOT_FOUND, | ||
23 | ERR_NOT_CALLABLE, | ||
24 | ERR_NOT_ENOUGH_ARGS, | ||
25 | ERR_TOO_MANY_ARGS, | ||
26 | ERR_WRONG_ARG_TYPE, | ||
27 | ERR_DIVISION_BY_ZERO, | ||
28 | ERR_AMBIGUOUS_PARAMS, | ||
29 | } ErrorValue; | ||
30 | |||
31 | typedef struct Error { | ||
32 | ErrorType type; | ||
33 | ErrorValue value; | ||
34 | size_t line; | ||
35 | size_t col; | ||
36 | } Error; | ||
37 | |||
38 | #define ERR_MAX_NUMBER 16 | ||
39 | |||
40 | typedef struct Errors { | ||
41 | Error errors[ERR_MAX_NUMBER]; | ||
42 | size_t n; | ||
43 | } Errors; | ||
44 | |||
45 | void error_push(Errors *errors, Error error); | ||
46 | void report_errors(Errors *errors, const char *file_name); | ||
47 | |||
48 | #endif // BDL_ERRORS_H | ||
diff --git a/src/lexer.c b/src/lexer.c new file mode 100755 index 0000000..6a417e4 --- /dev/null +++ b/src/lexer.c | |||
@@ -0,0 +1,244 @@ | |||
1 | #include "lexer.h" | ||
2 | #include "errors.h" | ||
3 | |||
4 | static const char* token_str[] = { | ||
5 | [TOKEN_UNKNOWN] = "TOKEN_UNKNOWN", | ||
6 | [TOKEN_LPAREN] = "TOKEN_LPAREN", | ||
7 | [TOKEN_RPAREN] = "TOKEN_RPAREN", | ||
8 | [TOKEN_FIXNUM] = "TOKEN_FIXNUM", | ||
9 | [TOKEN_SYMBOL] = "TOKEN_SYMBOL", | ||
10 | [TOKEN_STRING] = "TOKEN_STRING", | ||
11 | [TOKEN_NIL] = "TOKEN_NIL", | ||
12 | [TOKEN_TRUE] = "TOKEN_TRUE", | ||
13 | [TOKEN_FALSE] = "TOKEN_FALSE", | ||
14 | [TOKEN_EOF] = "TOKEN_EOF", | ||
15 | }; | ||
16 | |||
17 | void | ||
18 | print_token(Token tok) { | ||
19 | printf("[%4ld:%-4ld] ", tok.line, tok.column); | ||
20 | printf("%s", token_str[tok.type]); | ||
21 | switch (tok.type) { | ||
22 | case TOKEN_FIXNUM: { | ||
23 | printf(" -> "); | ||
24 | sv_write(&tok.value); | ||
25 | } break; | ||
26 | case TOKEN_SYMBOL: { | ||
27 | printf(" -> "); | ||
28 | sv_write(&tok.value); | ||
29 | } break; | ||
30 | case TOKEN_STRING: { | ||
31 | printf(" -> "); | ||
32 | sv_write(&tok.value); | ||
33 | } break; | ||
34 | default: { | ||
35 | } break; | ||
36 | } | ||
37 | printf("\n"); | ||
38 | } | ||
39 | |||
40 | char | ||
41 | scan_next(Scanner *scanner) { | ||
42 | char c = sv_next(&scanner->current); | ||
43 | if (c == '\n') { | ||
44 | scanner->line_number++; | ||
45 | scanner->col_number = 1; | ||
46 | } else { | ||
47 | scanner->col_number++; | ||
48 | } | ||
49 | scanner->offset++; | ||
50 | return c; | ||
51 | } | ||
52 | |||
53 | char | ||
54 | scan_peek(const Scanner *scanner) { | ||
55 | return sv_peek(&scanner->current); | ||
56 | } | ||
57 | |||
58 | bool | ||
59 | scan_has_next(const Scanner *scanner) { | ||
60 | return scanner->current.n != 0; | ||
61 | } | ||
62 | |||
63 | void | ||
64 | skip_whitespace(Scanner *scanner) { | ||
65 | while (scan_has_next(scanner)) { | ||
66 | char c = scan_peek(scanner); | ||
67 | switch (c) { | ||
68 | case ' ': | ||
69 | case '\f': | ||
70 | case '\n': | ||
71 | case '\r': | ||
72 | case '\t': | ||
73 | case '\v': { | ||
74 | scan_next(scanner); | ||
75 | } break; | ||
76 | default: { | ||
77 | return; | ||
78 | } break; | ||
79 | } | ||
80 | } | ||
81 | } | ||
82 | |||
83 | bool | ||
84 | is_delimiter(char c) { | ||
85 | switch (c) { | ||
86 | case EOF: | ||
87 | case '\0': | ||
88 | case ';': | ||
89 | case '"': | ||
90 | case '\'': | ||
91 | case '(': | ||
92 | case ')': | ||
93 | case ' ': | ||
94 | case '\f': | ||
95 | case '\n': | ||
96 | case '\r': | ||
97 | case '\t': | ||
98 | case '\v': { | ||
99 | return true; | ||
100 | } break; | ||
101 | } | ||
102 | return false; | ||
103 | } | ||
104 | |||
105 | #define TOKEN_IS_KEYWORD(VAL, KEYWORD) \ | ||
106 | sv_equal(&(VAL), &(StringView){(KEYWORD), sizeof(KEYWORD) - 1}) | ||
107 | |||
108 | TokenType | ||
109 | find_primitive_type(const StringView value) { | ||
110 | bool is_fixnum = true; | ||
111 | for (size_t i = 0; i < value.n; i++) { | ||
112 | char c = value.start[i]; | ||
113 | if (i == 0 && c == '-' && value.n > 1) { | ||
114 | continue; | ||
115 | } | ||
116 | if (!(c >= '0' && c <= '9')) { | ||
117 | is_fixnum = false; | ||
118 | break; | ||
119 | } | ||
120 | } | ||
121 | if (is_fixnum) { | ||
122 | return TOKEN_FIXNUM; | ||
123 | } | ||
124 | if (TOKEN_IS_KEYWORD(value, "nil")) { return TOKEN_NIL; } | ||
125 | if (TOKEN_IS_KEYWORD(value, "true")) { return TOKEN_TRUE; } | ||
126 | if (TOKEN_IS_KEYWORD(value, "false")) { return TOKEN_FALSE; } | ||
127 | |||
128 | return TOKEN_SYMBOL; | ||
129 | } | ||
130 | |||
131 | Tokens | ||
132 | tokenize(const StringView *sv) { | ||
133 | Tokens tokens = {0}; | ||
134 | tokens.tokens = NULL; | ||
135 | array_init(tokens.tokens, 1); | ||
136 | Scanner scanner = (Scanner){ | ||
137 | .current = *sv, | ||
138 | .line_number = 1, | ||
139 | .col_number = 1, | ||
140 | }; | ||
141 | |||
142 | while (scan_has_next(&scanner)) { | ||
143 | skip_whitespace(&scanner); | ||
144 | size_t line = scanner.line_number; | ||
145 | size_t col = scanner.col_number; | ||
146 | size_t offset = scanner.offset; | ||
147 | char c = scan_next(&scanner); | ||
148 | switch (c) { | ||
149 | case ';': { | ||
150 | while ((c = scan_next(&scanner)) != '\n' && c != '\0') {} | ||
151 | } break; | ||
152 | case '"': { | ||
153 | char prev = c; | ||
154 | bool found = false; | ||
155 | size_t n = 0; | ||
156 | while (scan_has_next(&scanner)) { | ||
157 | c = scan_next(&scanner); | ||
158 | if (c == '"' && prev != '\\') { | ||
159 | found = true; | ||
160 | break; | ||
161 | } | ||
162 | prev = c; | ||
163 | n++; | ||
164 | } | ||
165 | if (!found) { | ||
166 | error_push(&tokens.errors, (Error){ | ||
167 | .type = ERR_TYPE_LEXER, | ||
168 | .value = ERR_UNMATCHED_STRING, | ||
169 | .line = line, | ||
170 | .col = col, | ||
171 | }); | ||
172 | return tokens; | ||
173 | } | ||
174 | Token token = (Token){ | ||
175 | .value = (StringView){ | ||
176 | .start = &sv->start[offset + 1], | ||
177 | .n = n, | ||
178 | }, | ||
179 | .type = TOKEN_STRING, | ||
180 | .line = line, | ||
181 | .column = col, | ||
182 | }; | ||
183 | array_push(tokens.tokens, token); | ||
184 | } break; | ||
185 | case '(': { | ||
186 | if (scan_peek(&scanner) == ')') { | ||
187 | scan_next(&scanner); | ||
188 | Token token = (Token){ | ||
189 | .type = TOKEN_NIL, | ||
190 | .line = line, | ||
191 | .column = col, | ||
192 | }; | ||
193 | array_push(tokens.tokens, token); | ||
194 | } else { | ||
195 | Token token = (Token){ | ||
196 | .type = TOKEN_LPAREN, | ||
197 | .line = line, | ||
198 | .column = col, | ||
199 | }; | ||
200 | array_push(tokens.tokens, token); | ||
201 | } | ||
202 | } break; | ||
203 | case ')': { | ||
204 | Token token = (Token){ | ||
205 | .type = TOKEN_RPAREN, | ||
206 | .line = line, | ||
207 | .column = col, | ||
208 | }; | ||
209 | array_push(tokens.tokens, token); | ||
210 | } break; | ||
211 | default: { | ||
212 | size_t n = 1; | ||
213 | while (!is_delimiter(scan_peek(&scanner))) { | ||
214 | scan_next(&scanner); | ||
215 | n++; | ||
216 | } | ||
217 | if (c == EOF || c == '\0') { | ||
218 | break; | ||
219 | } | ||
220 | Token token = (Token){ | ||
221 | .value = (StringView){ | ||
222 | .start = &sv->start[offset], | ||
223 | .n = n, | ||
224 | }, | ||
225 | .type = TOKEN_SYMBOL, | ||
226 | .line = line, | ||
227 | .column = col, | ||
228 | }; | ||
229 | token.type = find_primitive_type(token.value); | ||
230 | array_push(tokens.tokens, token); | ||
231 | } break; | ||
232 | } | ||
233 | } | ||
234 | |||
235 | // Push EOF token. | ||
236 | Token token = (Token){ | ||
237 | .type = TOKEN_EOF, | ||
238 | .line = scanner.line_number, | ||
239 | .column = 1, | ||
240 | }; | ||
241 | array_push(tokens.tokens, token); | ||
242 | |||
243 | return tokens; | ||
244 | } | ||
diff --git a/src/lexer.h b/src/lexer.h new file mode 100755 index 0000000..e56f5f2 --- /dev/null +++ b/src/lexer.h | |||
@@ -0,0 +1,67 @@ | |||
1 | #ifndef BDL_LEXER_H | ||
2 | #define BDL_LEXER_H | ||
3 | |||
4 | #include "string_view.h" | ||
5 | |||
6 | typedef enum TokenType { | ||
7 | TOKEN_UNKNOWN = 0, | ||
8 | |||
9 | // Parentheses. | ||
10 | TOKEN_LPAREN, | ||
11 | TOKEN_RPAREN, | ||
12 | |||
13 | // Primitive types. | ||
14 | TOKEN_FIXNUM, | ||
15 | TOKEN_SYMBOL, | ||
16 | TOKEN_STRING, | ||
17 | TOKEN_NIL, | ||
18 | TOKEN_TRUE, | ||
19 | TOKEN_FALSE, | ||
20 | |||
21 | // End of file. | ||
22 | TOKEN_EOF, | ||
23 | } TokenType; | ||
24 | |||
25 | typedef struct Token { | ||
26 | TokenType type; | ||
27 | StringView value; | ||
28 | size_t line; | ||
29 | size_t column; | ||
30 | } Token; | ||
31 | |||
32 | typedef struct Tokens { | ||
33 | Token *tokens; | ||
34 | Errors errors; | ||
35 | } Tokens; | ||
36 | |||
37 | typedef struct Scanner { | ||
38 | StringView current; | ||
39 | size_t line_number; | ||
40 | size_t col_number; | ||
41 | size_t offset; | ||
42 | } Scanner; | ||
43 | |||
44 | // Print a token to standard output for debugging purposes. | ||
45 | void print_token(Token tok); | ||
46 | |||
47 | // Same functionality as the ScanView pairs, but keeping track of line and | ||
48 | // column numbers. | ||
49 | char scan_next(Scanner *scanner); | ||
50 | char scan_peek(const Scanner *scanner); | ||
51 | |||
52 | // Check if the current scanner still have characters left. | ||
53 | bool scan_has_next(const Scanner *scanner); | ||
54 | |||
55 | // Advance the scanner until we ran out of whitespace. | ||
56 | void skip_whitespace(Scanner *scanner); | ||
57 | |||
58 | // Check if a given character is a delimiter. | ||
59 | bool is_delimiter(char c); | ||
60 | |||
61 | // Extract the token type from the current string. | ||
62 | TokenType find_primitive_type(const StringView value); | ||
63 | |||
64 | // Generate a list of tokens from the given string. | ||
65 | Tokens tokenize(const StringView *sv); | ||
66 | |||
67 | #endif // BDL_LEXER_H | ||
diff --git a/src/main.c b/src/main.c new file mode 100755 index 0000000..90860e8 --- /dev/null +++ b/src/main.c | |||
@@ -0,0 +1,137 @@ | |||
1 | #include <getopt.h> | ||
2 | #include <stdio.h> | ||
3 | #include <stdlib.h> | ||
4 | |||
5 | #include "common.h" | ||
6 | #include "darray.h" | ||
7 | #include "string_view.c" | ||
8 | #include "errors.c" | ||
9 | #include "lexer.c" | ||
10 | |||
11 | void | ||
12 | init(void) { | ||
13 | // STUB | ||
14 | } | ||
15 | |||
16 | void | ||
17 | halt(void) { | ||
18 | // STUB | ||
19 | } | ||
20 | |||
21 | void | ||
22 | process_source(const StringView *source, const char *file_name) { | ||
23 | // Read tokens. | ||
24 | Tokens tokens = tokenize(source); | ||
25 | if (tokens.errors.n != 0) { | ||
26 | report_errors(&tokens.errors, file_name); | ||
27 | exit(EXIT_FAILURE); | ||
28 | } | ||
29 | |||
30 | // TODO: Parser. | ||
31 | // TODO: Semantic analysis. | ||
32 | // TODO: Optimization. | ||
33 | // TODO: Compilation. | ||
34 | |||
35 | // Free resources. | ||
36 | array_free(tokens.tokens); | ||
37 | } | ||
38 | |||
39 | void | ||
40 | run_file(char *file_name) { | ||
41 | FILE *file = fopen(file_name, "r"); | ||
42 | if (!file) { | ||
43 | fprintf(stderr, "error: couldn't open input file: %s\n", file_name); | ||
44 | exit(EXIT_FAILURE); | ||
45 | } | ||
46 | |||
47 | // Read entire file into memory. | ||
48 | fseek(file, 0, SEEK_END); | ||
49 | size_t file_size = ftell(file); | ||
50 | fseek(file, 0, SEEK_SET); | ||
51 | |||
52 | char *source = malloc(file_size + 1); | ||
53 | fread(source, 1, file_size, file); | ||
54 | source[file_size] = 0; | ||
55 | |||
56 | StringView sv = (StringView){ | ||
57 | .start = source, | ||
58 | .n = file_size, | ||
59 | }; | ||
60 | |||
61 | process_source(&sv, file_name); | ||
62 | |||
63 | free(source); | ||
64 | fclose(file); | ||
65 | } | ||
66 | |||
67 | #define STDIN_BUF_CAP 16 | ||
68 | |||
69 | void | ||
70 | run_stdin(void) { | ||
71 | size_t buf_size = 0; | ||
72 | char *source = NULL; | ||
73 | array_init(source, STDIN_BUF_CAP); | ||
74 | |||
75 | char c; | ||
76 | while ((c = getchar()) != EOF) { | ||
77 | array_push(source, c); | ||
78 | buf_size++; | ||
79 | } | ||
80 | |||
81 | StringView sv = (StringView){ | ||
82 | .start = source, | ||
83 | .n = buf_size, | ||
84 | }; | ||
85 | |||
86 | process_source(&sv, "stdin"); | ||
87 | |||
88 | array_free(source); | ||
89 | } | ||
90 | |||
91 | #ifndef BIN_NAME | ||
92 | #define BIN_NAME "bdl" | ||
93 | #endif | ||
94 | |||
95 | void | ||
96 | print_usage(void) { | ||
97 | printf("Usage: %s [options] <filename filename ...>\n", BIN_NAME); | ||
98 | printf("\n"); | ||
99 | printf("\t-h \tShow usage.\n"); | ||
100 | printf("\n"); | ||
101 | } | ||
102 | |||
103 | int | ||
104 | main(int argc, char *argv[]) { | ||
105 | init(); | ||
106 | |||
107 | int option; | ||
108 | while ((option = getopt(argc, argv, "h")) != -1) { | ||
109 | switch (option) { | ||
110 | case 'h': { | ||
111 | print_usage(); | ||
112 | goto exit_success; | ||
113 | } break; | ||
114 | default: { | ||
115 | print_usage(); | ||
116 | return EXIT_FAILURE; | ||
117 | } break; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | // Run from stdin. | ||
122 | if (optind == argc) { | ||
123 | run_stdin(); | ||
124 | goto exit_success; | ||
125 | } | ||
126 | |||
127 | // Run from file. | ||
128 | while (optind < argc) { | ||
129 | char *file_name = argv[optind]; | ||
130 | run_file(file_name); | ||
131 | optind++; | ||
132 | } | ||
133 | |||
134 | exit_success: | ||
135 | halt(); | ||
136 | return EXIT_SUCCESS; | ||
137 | } | ||
diff --git a/src/string_view.c b/src/string_view.c new file mode 100755 index 0000000..8247bd4 --- /dev/null +++ b/src/string_view.c | |||
@@ -0,0 +1,40 @@ | |||
1 | #include "string_view.h" | ||
2 | |||
3 | char | ||
4 | sv_next(StringView *sv) { | ||
5 | if (sv->n == 0) { | ||
6 | return '\0'; | ||
7 | } | ||
8 | char c = sv->start[0]; | ||
9 | sv->start++; | ||
10 | sv->n--; | ||
11 | return c; | ||
12 | } | ||
13 | |||
14 | char | ||
15 | sv_peek(const StringView *sv) { | ||
16 | if (sv->n == 0) { | ||
17 | return '\0'; | ||
18 | } | ||
19 | return sv->start[0]; | ||
20 | } | ||
21 | |||
22 | bool | ||
23 | sv_equal(const StringView *a, const StringView *b) { | ||
24 | if (a->n != b->n) { | ||
25 | return false; | ||
26 | } | ||
27 | for (size_t i = 0; i < a->n; i++) { | ||
28 | if (a->start[i] != b->start[i]) { | ||
29 | return false; | ||
30 | } | ||
31 | } | ||
32 | return true; | ||
33 | } | ||
34 | |||
35 | void | ||
36 | sv_write(const StringView *sv) { | ||
37 | for (size_t i = 0; i < sv->n; i++) { | ||
38 | putchar(sv->start[i]); | ||
39 | } | ||
40 | } | ||
diff --git a/src/string_view.h b/src/string_view.h new file mode 100755 index 0000000..4dbbaaf --- /dev/null +++ b/src/string_view.h | |||
@@ -0,0 +1,25 @@ | |||
1 | #ifndef BDL_STRINGVIEW_H | ||
2 | #define BDL_STRINGVIEW_H | ||
3 | |||
4 | #include "common.h" | ||
5 | |||
6 | typedef struct StringView { | ||
7 | char *start; | ||
8 | size_t n; | ||
9 | } StringView; | ||
10 | |||
11 | // Consume a character in the stream. | ||
12 | char sv_next(StringView *sv); | ||
13 | |||
14 | // Check what is the current character in the stream. | ||
15 | char sv_peek(const StringView *sv); | ||
16 | |||
17 | // Compare if the arguments are the same. | ||
18 | bool sv_equal(const StringView *a, const StringView *b); | ||
19 | |||
20 | // Write a character to the given output stream. | ||
21 | void sv_write(const StringView *sv); | ||
22 | |||
23 | #define STRING(STR) (StringView){(STR), sizeof(STR) - 1} | ||
24 | |||
25 | #endif // BDL_STRINGVIEW_H | ||