aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2021-10-29 19:11:40 +0200
committerBad Diode <bd@badd10de.dev>2021-10-29 19:11:40 +0200
commit5ed73b695e6b463149ab0c9ae3eccb26a4ec5807 (patch)
tree01aa089934d1b49fe515fe86fffca01c471c69e9
parente73a4c16a2269cdb2f5e7d66fb9839e4c44e14de (diff)
downloadbdl-5ed73b695e6b463149ab0c9ae3eccb26a4ec5807.tar.gz
bdl-5ed73b695e6b463149ab0c9ae3eccb26a4ec5807.zip
Add parser for tokens->ast conversion
-rwxr-xr-xsrc/lexer.c37
-rwxr-xr-xsrc/lexer.h9
-rwxr-xr-xsrc/main.c23
-rw-r--r--src/parser.c257
-rwxr-xr-xsrc/parser.h84
5 files changed, 379 insertions, 31 deletions
diff --git a/src/lexer.c b/src/lexer.c
index 6a417e4..ac93a5c 100755
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -16,7 +16,7 @@ static const char* token_str[] = {
16 16
17void 17void
18print_token(Token tok) { 18print_token(Token tok) {
19 printf("[%4ld:%-4ld] ", tok.line, tok.column); 19 printf("[%4ld:%-4ld] ", tok.line, tok.col);
20 printf("%s", token_str[tok.type]); 20 printf("%s", token_str[tok.type]);
21 switch (tok.type) { 21 switch (tok.type) {
22 case TOKEN_FIXNUM: { 22 case TOKEN_FIXNUM: {
@@ -128,11 +128,10 @@ find_primitive_type(const StringView value) {
128 return TOKEN_SYMBOL; 128 return TOKEN_SYMBOL;
129} 129}
130 130
131Tokens 131Token *
132tokenize(const StringView *sv) { 132tokenize(const StringView *sv, Errors *errors) {
133 Tokens tokens = {0}; 133 Token *tokens = NULL;
134 tokens.tokens = NULL; 134 array_init(tokens, 1);
135 array_init(tokens.tokens, 1);
136 Scanner scanner = (Scanner){ 135 Scanner scanner = (Scanner){
137 .current = *sv, 136 .current = *sv,
138 .line_number = 1, 137 .line_number = 1,
@@ -163,7 +162,7 @@ tokenize(const StringView *sv) {
163 n++; 162 n++;
164 } 163 }
165 if (!found) { 164 if (!found) {
166 error_push(&tokens.errors, (Error){ 165 error_push(errors, (Error){
167 .type = ERR_TYPE_LEXER, 166 .type = ERR_TYPE_LEXER,
168 .value = ERR_UNMATCHED_STRING, 167 .value = ERR_UNMATCHED_STRING,
169 .line = line, 168 .line = line,
@@ -178,9 +177,9 @@ tokenize(const StringView *sv) {
178 }, 177 },
179 .type = TOKEN_STRING, 178 .type = TOKEN_STRING,
180 .line = line, 179 .line = line,
181 .column = col, 180 .col = col,
182 }; 181 };
183 array_push(tokens.tokens, token); 182 array_push(tokens, token);
184 } break; 183 } break;
185 case '(': { 184 case '(': {
186 if (scan_peek(&scanner) == ')') { 185 if (scan_peek(&scanner) == ')') {
@@ -188,25 +187,25 @@ tokenize(const StringView *sv) {
188 Token token = (Token){ 187 Token token = (Token){
189 .type = TOKEN_NIL, 188 .type = TOKEN_NIL,
190 .line = line, 189 .line = line,
191 .column = col, 190 .col = col,
192 }; 191 };
193 array_push(tokens.tokens, token); 192 array_push(tokens, token);
194 } else { 193 } else {
195 Token token = (Token){ 194 Token token = (Token){
196 .type = TOKEN_LPAREN, 195 .type = TOKEN_LPAREN,
197 .line = line, 196 .line = line,
198 .column = col, 197 .col = col,
199 }; 198 };
200 array_push(tokens.tokens, token); 199 array_push(tokens, token);
201 } 200 }
202 } break; 201 } break;
203 case ')': { 202 case ')': {
204 Token token = (Token){ 203 Token token = (Token){
205 .type = TOKEN_RPAREN, 204 .type = TOKEN_RPAREN,
206 .line = line, 205 .line = line,
207 .column = col, 206 .col = col,
208 }; 207 };
209 array_push(tokens.tokens, token); 208 array_push(tokens, token);
210 } break; 209 } break;
211 default: { 210 default: {
212 size_t n = 1; 211 size_t n = 1;
@@ -224,10 +223,10 @@ tokenize(const StringView *sv) {
224 }, 223 },
225 .type = TOKEN_SYMBOL, 224 .type = TOKEN_SYMBOL,
226 .line = line, 225 .line = line,
227 .column = col, 226 .col = col,
228 }; 227 };
229 token.type = find_primitive_type(token.value); 228 token.type = find_primitive_type(token.value);
230 array_push(tokens.tokens, token); 229 array_push(tokens, token);
231 } break; 230 } break;
232 } 231 }
233 } 232 }
@@ -236,9 +235,9 @@ tokenize(const StringView *sv) {
236 Token token = (Token){ 235 Token token = (Token){
237 .type = TOKEN_EOF, 236 .type = TOKEN_EOF,
238 .line = scanner.line_number, 237 .line = scanner.line_number,
239 .column = 1, 238 .col = 1,
240 }; 239 };
241 array_push(tokens.tokens, token); 240 array_push(tokens, token);
242 241
243 return tokens; 242 return tokens;
244} 243}
diff --git a/src/lexer.h b/src/lexer.h
index e56f5f2..43898bd 100755
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -26,14 +26,9 @@ typedef struct Token {
26 TokenType type; 26 TokenType type;
27 StringView value; 27 StringView value;
28 size_t line; 28 size_t line;
29 size_t column; 29 size_t col;
30} Token; 30} Token;
31 31
32typedef struct Tokens {
33 Token *tokens;
34 Errors errors;
35} Tokens;
36
37typedef struct Scanner { 32typedef struct Scanner {
38 StringView current; 33 StringView current;
39 size_t line_number; 34 size_t line_number;
@@ -62,6 +57,6 @@ bool is_delimiter(char c);
62TokenType find_primitive_type(const StringView value); 57TokenType find_primitive_type(const StringView value);
63 58
64// Generate a list of tokens from the given string. 59// Generate a list of tokens from the given string.
65Tokens tokenize(const StringView *sv); 60Token * tokenize(const StringView *sv, Errors *errors);
66 61
67#endif // BDL_LEXER_H 62#endif // BDL_LEXER_H
diff --git a/src/main.c b/src/main.c
index 90860e8..c734916 100755
--- a/src/main.c
+++ b/src/main.c
@@ -7,6 +7,7 @@
7#include "string_view.c" 7#include "string_view.c"
8#include "errors.c" 8#include "errors.c"
9#include "lexer.c" 9#include "lexer.c"
10#include "parser.c"
10 11
11void 12void
12init(void) { 13init(void) {
@@ -20,20 +21,32 @@ halt(void) {
20 21
21void 22void
22process_source(const StringView *source, const char *file_name) { 23process_source(const StringView *source, const char *file_name) {
24 Errors errors = {0};
25
23 // Read tokens. 26 // Read tokens.
24 Tokens tokens = tokenize(source); 27 Token *tokens = tokenize(source, &errors);
25 if (tokens.errors.n != 0) { 28 if (errors.n != 0) {
26 report_errors(&tokens.errors, file_name); 29 report_errors(&errors, file_name);
30 array_free(tokens);
31 exit(EXIT_FAILURE);
32 }
33
34 // Parser.
35 Root *roots = parse(tokens, &errors);
36 if (errors.n != 0) {
37 report_errors(&errors, file_name);
38 free_roots(roots);
39 array_free(tokens);
27 exit(EXIT_FAILURE); 40 exit(EXIT_FAILURE);
28 } 41 }
42 array_free(tokens);
29 43
30 // TODO: Parser.
31 // TODO: Semantic analysis. 44 // TODO: Semantic analysis.
32 // TODO: Optimization. 45 // TODO: Optimization.
33 // TODO: Compilation. 46 // TODO: Compilation.
34 47
35 // Free resources. 48 // Free resources.
36 array_free(tokens.tokens); 49 free_roots(roots);
37} 50}
38 51
39void 52void
diff --git a/src/parser.c b/src/parser.c
new file mode 100644
index 0000000..2d70c9d
--- /dev/null
+++ b/src/parser.c
@@ -0,0 +1,257 @@
1#include "parser.h"
2#include "darray.h"
3
4Token
5peek_token(const Parser *parser) {
6 return parser->tokens[parser->current];
7}
8
9Token
10next_token(Parser *parser) {
11 return parser->tokens[parser->current++];
12}
13
14Token
15previous_token(Parser *parser) {
16 return parser->tokens[parser->current - 1];
17}
18
19Token
20rewind_token(Parser *parser) {
21 return parser->tokens[--parser->current];
22}
23
24bool
25has_next_token(const Parser *parser) {
26 return parser->current < array_size(parser->tokens)
27 && peek_token(parser).type != TOKEN_EOF;
28}
29
30Object *
31parse_fixnum(Token tok) {
32 ssize_t num = 0;
33 ssize_t sign = 1;
34 for (size_t i = 0; i < tok.value.n; i++) {
35 char c = tok.value.start[i];
36 if (c == '-') {
37 sign = -1;
38 continue;
39 }
40 num = num * 10 + (c - '0');
41 }
42 Object *ret = object_alloc(tok, OBJ_TYPE_FIXNUM);
43 ret->fixnum = num * sign;
44 return ret;
45}
46
47Object *
48parse_bool(Token tok) {
49 ObjectType type = tok.type == TOKEN_TRUE ? OBJ_TYPE_TRUE : OBJ_TYPE_FALSE;
50 Object *ret = object_alloc(tok, type);
51 return ret;
52}
53
54Object *
55parse_string(Token tok) {
56 Object *ret = object_alloc(tok, OBJ_TYPE_STRING);
57 array_init(ret->text, tok.value.n);
58 array_insert(ret->text, tok.value.start, tok.value.n);
59 return ret;
60}
61
62Object *
63parse_symbol(Token tok) {
64 Object *ret = object_alloc(tok, OBJ_TYPE_SYMBOL);
65 array_init(ret->text, tok.value.n);
66 array_insert(ret->text, tok.value.start, tok.value.n);
67 return ret;
68}
69
70Object *
71parse_list(Parser *parser, Errors *errors) {
72 if (errors->n != 0) {
73 return NULL;
74 }
75 Token start = previous_token(parser);
76 Object *root = object_alloc(start, OBJ_TYPE_PAIR);
77 root->car = NULL;
78 root->cdr = NULL;
79 Object *current = root;
80 while (has_next_token(parser)) {
81 current->car = parse_tree(parser, errors);
82 if (errors->n != 0 || current->car == NULL) {
83 object_free(root);
84 return NULL;
85 }
86
87 Token tok = peek_token(parser);
88 if (tok.type == TOKEN_RPAREN) {
89 next_token(parser);
90 return root;
91 }
92
93 Object *next = object_alloc(start, OBJ_TYPE_PAIR);
94 next->car = NULL;
95 next->cdr = NULL;
96 current->cdr = next;
97 current = current->cdr;
98 }
99 object_free(root);
100 error_push(errors, (Error){
101 .type = ERR_TYPE_PARSER,
102 .value = ERR_UNBALANCED_PAREN,
103 .line = start.line,
104 .col = start.col,
105 });
106 return NULL;
107}
108
109Object *
110parse_tree(Parser *parser, Errors *errors) {
111 Token tok = next_token(parser);
112 if (errors->n != 0) {
113 return NULL;
114 }
115 switch (tok.type) {
116 case TOKEN_FIXNUM: {
117 return parse_fixnum(tok);
118 } break;
119 case TOKEN_TRUE:
120 case TOKEN_FALSE: {
121 return parse_bool(tok);
122 } break;
123 case TOKEN_RPAREN: {
124 error_push(errors, (Error){
125 .type = ERR_TYPE_PARSER,
126 .value = ERR_UNBALANCED_PAREN,
127 .line = tok.line,
128 .col = tok.col,
129 });
130 return NULL;
131 } break;
132 case TOKEN_LPAREN: {
133 return parse_list(parser, errors);
134 } break;
135 case TOKEN_STRING: {
136 return parse_string(tok);
137 } break;
138 case TOKEN_SYMBOL: {
139 return parse_symbol(tok);
140 } break;
141 case TOKEN_NIL: {
142 return object_alloc(tok, OBJ_TYPE_NIL);
143 } break;
144 default: {
145 break;
146 } break;
147 }
148 error_push(errors, (Error){
149 .type = ERR_TYPE_PARSER,
150 .value = ERR_EOF_REACHED,
151 .line = tok.line,
152 .col = tok.col,
153 });
154 return NULL;
155}
156
157Root *
158parse(Token *tokens, Errors *errors) {
159 Root *roots = NULL;
160 array_init(roots, 0);
161 Parser parser = {
162 .tokens = tokens,
163 .current = 0,
164 };
165 while (has_next_token(&parser)) {
166 Object *root = parse_tree(&parser, errors);
167 OBJ_PRINT(root);
168 if (errors->n != 0) {
169 break;
170 }
171 array_push(roots, root);
172 }
173 return roots;
174}
175
176Object *
177object_alloc(Token tok, ObjectType type) {
178 Object *node = malloc(sizeof(Object));
179 node->line = tok.line;
180 node->col = tok.col;
181 node->type = type;
182 return node;
183}
184
185void
186object_free(Object *node) {
187 if (node == NULL) {
188 return;
189 }
190 if (IS_STRING(node) || IS_SYMBOL(node)) {
191 array_free(node->text);
192 }
193 if (IS_PAIR(node)) {
194 object_free(node->car);
195 object_free(node->cdr);
196 }
197 free(node);
198}
199
200void
201free_roots(Root *roots) {
202 if (roots != NULL) {
203 for (size_t i = 0; i < array_size(roots); i++) {
204 object_free(roots[i]);
205 }
206 array_free(roots);
207 }
208}
209
210void
211display_pair(Object *obj) {
212 object_display(obj->car);
213 if (obj->cdr == NULL) {
214 return;
215 }
216 if (IS_PAIR(obj->cdr)) {
217 printf(" ");
218 display_pair(obj->cdr);
219 } else {
220 printf(" . ");
221 object_display(obj->cdr);
222 }
223}
224
225void
226object_display(Object *obj) {
227 if (obj == NULL) {
228 printf("#{error}");
229 return;
230 }
231 switch (obj->type) {
232 case OBJ_TYPE_FIXNUM: {
233 printf("%zd", obj->fixnum);
234 } break;
235 case OBJ_TYPE_TRUE: {
236 printf("true");
237 } break;
238 case OBJ_TYPE_FALSE: {
239 printf("false");
240 } break;
241 case OBJ_TYPE_NIL: {
242 printf("()");
243 } break;
244 case OBJ_TYPE_STRING: {
245 printf("\"%.*s\"", (int)array_size(obj->text), obj->text);
246 } break;
247 case OBJ_TYPE_SYMBOL: {
248 printf(":%.*s", (int)array_size(obj->text), obj->text);
249 } break;
250 case OBJ_TYPE_PAIR: {
251 printf("(");
252 display_pair(obj);
253 printf(")");
254 } break;
255 }
256 return;
257}
diff --git a/src/parser.h b/src/parser.h
new file mode 100755
index 0000000..ee5febe
--- /dev/null
+++ b/src/parser.h
@@ -0,0 +1,84 @@
1#ifndef BDL_PARSER_H
2#define BDL_PARSER_H
3
4#include "lexer.h"
5
6typedef enum ObjectType {
7 OBJ_TYPE_NIL,
8 OBJ_TYPE_TRUE,
9 OBJ_TYPE_FALSE,
10 OBJ_TYPE_FIXNUM,
11 OBJ_TYPE_SYMBOL,
12 OBJ_TYPE_STRING,
13 OBJ_TYPE_PAIR,
14} ObjectType;
15
16typedef struct Object {
17 ObjectType type;
18 union {
19 // OBJ_TYPE_FIXNUM
20 ssize_t fixnum;
21
22 // OBJ_TYPE_STRING
23 // OBJ_TYPE_SYMBOL
24 char *text;
25
26 // OBJ_TYPE_PAIR
27 struct {
28 struct Object *car;
29 struct Object *cdr;
30 };
31 };
32
33 size_t line;
34 size_t col;
35} Object;
36
37typedef struct Parser {
38 Token *tokens;
39 size_t current;
40} Parser;
41
42typedef Object* Root;
43
44// Mimics the functionality in the Scanner functions, but for tokens.
45Token next_token(Parser *parser);
46Token previous_token(Parser *parser);
47Token rewind_token(Parser *parser);
48Token peek_token(const Parser *parser);
49bool has_next_token(const Parser *parser);
50
51// Parsing operations.
52Object * parse_tree(Parser *parser, Errors *errors);
53Object * parse_symbol(Token tok);
54Object * parse_string(Token tok);
55Object * parse_bool(Token tok);
56Object * parse_fixnum(Token tok);
57Object * parse_list(Parser *parser, Errors *errors);
58Root * parse(Token *tokens, Errors *errors);
59
60// Object operations.
61void object_display(Object *obj);
62
63// Manage resources.
64Object * object_alloc(Token tok, ObjectType type);
65void object_free(Object *node);
66void free_roots(Root *roots);
67
68//
69// Helper macros.
70//
71
72// Type checking.
73#define IS_NIL(VAL) ((VAL)->type == OBJ_TYPE_NIL)
74#define IS_TRUE(VAL) ((VAL)->type != OBJ_TYPE_FALSE)
75#define IS_FALSE(VAL) ((VAL)->type == OBJ_TYPE_FALSE)
76#define IS_FIXNUM(VAL) ((VAL)->type == OBJ_TYPE_FIXNUM)
77#define IS_STRING(VAL) ((VAL)->type == OBJ_TYPE_STRING)
78#define IS_SYMBOL(VAL) ((VAL)->type == OBJ_TYPE_SYMBOL)
79#define IS_PAIR(VAL) ((VAL)->type == OBJ_TYPE_PAIR)
80
81// Debug.
82#define OBJ_PRINT(OBJ) object_display(OBJ); printf("\n");
83
84#endif // BDL_PARSER_H