aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2024-06-16 15:00:19 +0200
committerBad Diode <bd@badd10de.dev>2024-06-16 15:00:19 +0200
commit2e7f414c65d89ebe52570b0b0fb9b7ff2585bf96 (patch)
tree61a0178c877e69f674daff465fba30af76b379ab
parentc2c8796511c90930c41700f5fcd2043a5c4405c9 (diff)
downloadbdl-2e7f414c65d89ebe52570b0b0fb9b7ff2585bf96.tar.gz
bdl-2e7f414c65d89ebe52570b0b0fb9b7ff2585bf96.zip
Add graphviz visualization for the parse tree
-rw-r--r--Makefile29
-rw-r--r--src/badlib.h3
-rw-r--r--src/lexer.c16
-rw-r--r--src/main.c249
-rw-r--r--tests/expressions.bad11
5 files changed, 258 insertions, 50 deletions
diff --git a/Makefile b/Makefile
index 987ecd8..35447d3 100644
--- a/Makefile
+++ b/Makefile
@@ -5,10 +5,17 @@
5SRC_DIR := src 5SRC_DIR := src
6BUILD_DIR := build 6BUILD_DIR := build
7SRC_MAIN := $(SRC_DIR)/main.c 7SRC_MAIN := $(SRC_DIR)/main.c
8SRC_BAD := tests/expressions.bad
8WATCH_SRC := $(shell find $(SRC_DIR) -name "*.c" -or -name "*.s" -or -name "*.h") 9WATCH_SRC := $(shell find $(SRC_DIR) -name "*.c" -or -name "*.s" -or -name "*.h")
9INC_DIRS := $(shell find $(SRC_DIR) -type d) 10INC_DIRS := $(shell find $(SRC_DIR) -type d)
10INC_FLAGS := $(addprefix -I,$(INC_DIRS)) 11INC_FLAGS := $(addprefix -I,$(INC_DIRS))
11 12
13# Graphviz viz command.
14DOT := dot -Gmargin=0.7 -Gcolor=white -Gfontcolor=white \
15 -Ncolor=white -Nfontcolor=white -Ecolor=white \
16 -Nfontname="Iosevka Nerd Font" -Nfontsize=15 \
17 -T png | kitty +kitten icat
18
12# Output executable. 19# Output executable.
13TARGET := bdl 20TARGET := bdl
14BIN := $(BUILD_DIR)/$(TARGET) 21BIN := $(BUILD_DIR)/$(TARGET)
@@ -44,19 +51,23 @@ $(BUILD_DIR):
44 mkdir -p $(BUILD_DIR) 51 mkdir -p $(BUILD_DIR)
45 52
46run: $(BIN) 53run: $(BIN)
47 $(BIN) tests/expressions.bad 54 $(BIN) $(SRC_BAD)
48 55
49viz_lex: $(BIN) 56graph-tokens: $(BIN)
50 $(BIN) -pl example.bdl 57 # TODO: ...
58 $(BIN) -pl $(SRC_BAD)
51 59
52viz_par: $(BIN) 60graph-parse: $(BIN)
53 $(BIN) -pp example.bdl | idot 61 @echo "parsing tree for: '$(SRC_BAD)'"
62 @$(BIN) -pp $(SRC_BAD) | $(DOT)
54 63
55viz_sem: $(BIN) 64graph-semantic: $(BIN)
56 $(BIN) -ps example.bdl | idot 65 @echo "semantic tree for: '$(SRC_BAD)'"
66 @$(BIN) -ps $(SRC_BAD) | $(DOT)
57 67
58viz_sym: $(BIN) 68graph-symbols: $(BIN)
59 $(BIN) -pt example.bdl | idot 69 @echo "symbol table for: '$(SRC_BAD)'"
70 @$(BIN) -pt $(SRC_BAD) | $(DOT)
60 71
61# Remove build directory. 72# Remove build directory.
62clean: 73clean:
diff --git a/src/badlib.h b/src/badlib.h
index 71f4bee..4944d2e 100644
--- a/src/badlib.h
+++ b/src/badlib.h
@@ -832,6 +832,9 @@ _array_maybe_grow(void *arr, sz type_size, Arena *a) {
832 832
833static inline char * 833static inline char *
834_array_insert(char *arr, const char *src, sz n_bytes, sz type_size, Arena *a) { 834_array_insert(char *arr, const char *src, sz n_bytes, sz type_size, Arena *a) {
835 if (!arr) {
836 arr = _array_reserve(0, 0, a);
837 }
835 ArrayHeader *head = array_head(arr); 838 ArrayHeader *head = array_head(arr);
836 sz new_size = n_bytes + head->size; 839 sz new_size = n_bytes + head->size;
837 if (new_size > head->cap * type_size) { 840 if (new_size > head->cap * type_size) {
diff --git a/src/lexer.c b/src/lexer.c
index df998f2..6a5621c 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -1,6 +1,8 @@
1#include "badlib.h"
2
1#define LEXER_MEM GB(2) 3#define LEXER_MEM GB(2)
2 4
3typedef enum TokenType { 5typedef enum TokenKind {
4 TOK_UNKNOWN = 0, 6 TOK_UNKNOWN = 0,
5 7
6 // Parentheses. 8 // Parentheses.
@@ -65,7 +67,7 @@ typedef enum TokenType {
65 67
66 // End of file. 68 // End of file.
67 TOK_EOF, 69 TOK_EOF,
68} TokenType; 70} TokenKind;
69 71
70Str token_str[] = { 72Str token_str[] = {
71 [TOK_UNKNOWN] = cstr("UNKNOWN"), 73 [TOK_UNKNOWN] = cstr("UNKNOWN"),
@@ -135,7 +137,7 @@ Str token_str[] = {
135}; 137};
136 138
137typedef struct Token { 139typedef struct Token {
138 TokenType type; 140 TokenKind kind;
139 Str val; 141 Str val;
140 sz line; 142 sz line;
141 sz col; 143 sz col;
@@ -256,7 +258,7 @@ scan_skip_until_valid(Scanner *scanner) {
256} 258}
257 259
258Token 260Token
259emit_token(Scanner current, Scanner *scanner, TokenType t) { 261emit_token(Scanner current, Scanner *scanner, TokenKind t) {
260 Str val = current.str; 262 Str val = current.str;
261 val.size = current.str.size - scanner->str.size; 263 val.size = current.str.size - scanner->str.size;
262 val.size = val.size < 0 ? 0 : val.size; 264 val.size = val.size < 0 ? 0 : val.size;
@@ -264,7 +266,7 @@ emit_token(Scanner current, Scanner *scanner, TokenType t) {
264 .val = val, 266 .val = val,
265 .line = current.line + 1, 267 .line = current.line + 1,
266 .col = current.col + 1, 268 .col = current.col + 1,
267 .type = t, 269 .kind = t,
268 }; 270 };
269} 271}
270 272
@@ -274,7 +276,7 @@ emit_token_err(Scanner *scanner, Str err_msg) {
274 .line = scanner->line + 1, 276 .line = scanner->line + 1,
275 .col = scanner->col + 1, 277 .col = scanner->col + 1,
276 .val = err_msg, 278 .val = err_msg,
277 .type = TOK_UNKNOWN, 279 .kind = TOK_UNKNOWN,
278 }; 280 };
279} 281}
280 282
@@ -430,7 +432,7 @@ scan_token(Scanner *scanner) {
430 *scanner = current; 432 *scanner = current;
431 return emit_token_number(scanner); 433 return emit_token_number(scanner);
432 } 434 }
433 return emit_token(current, scanner, TOK_ADD); 435 return emit_token(current, scanner, TOK_SUB);
434 }; 436 };
435 case '*': 437 case '*':
436 return emit_token(current, scanner, TOK_MUL); 438 return emit_token(current, scanner, TOK_MUL);
diff --git a/src/main.c b/src/main.c
index 44a7bf1..cea7a45 100644
--- a/src/main.c
+++ b/src/main.c
@@ -22,7 +22,7 @@ init(void) {
22 22
23void 23void
24print_token(Token tok) { 24print_token(Token tok) {
25 println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.type], tok.val); 25 println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.kind], tok.val);
26} 26}
27 27
28void 28void
@@ -34,13 +34,113 @@ print_tokens(Str path, Token *tokens) {
34 } 34 }
35} 35}
36 36
37//
38// Nodes
39//
40
41typedef enum NodeKind {
42 NODE_NUMBER,
43 // TODO: probably want to handle ints/unsigneds/floats separately.
44 NODE_ADD,
45 NODE_SUB,
46 NODE_DIV,
47 NODE_MUL,
48 // TODO: MOD
49} NodeKind;
50
51Str node_str[] = {
52 [NODE_NUMBER] = cstr("NUM"), [NODE_ADD] = cstr("ADD"),
53 [NODE_SUB] = cstr("SUB"), [NODE_DIV] = cstr("DIV"),
54 [NODE_MUL] = cstr("MUL"),
55};
56
57typedef struct Node {
58 sz id;
59 sz line;
60 sz col;
61
62 NodeKind kind;
63 union {
64 f64 d;
65 f32 f;
66 sz i;
67 u64 u;
68 } value;
69 struct Node *left;
70 struct Node *right;
71} Node;
72
73Node *
74node_alloc(NodeKind kind, Token tok, Arena *a) {
75 static sz id = 0;
76 Node *node = arena_calloc((sz)sizeof(Node), a);
77 node->id = id++;
78 node->kind = kind;
79 node->line = tok.line;
80 node->col = tok.col;
81 return node;
82}
83
84void
85print_node(Node *node) {
86 if (!node) {
87 return;
88 }
89 switch (node->kind) {
90 case NODE_NUMBER: {
91 print("%d", node->value.i);
92 } break;
93 case NODE_ADD: {
94 print("(+ ");
95 print_node(node->left);
96 print(" ");
97 print_node(node->right);
98 print(")");
99 } break;
100 case NODE_SUB: {
101 print("(- ");
102 print_node(node->left);
103 print(" ");
104 print_node(node->right);
105 print(")");
106 } break;
107 case NODE_DIV: {
108 print("(/ ");
109 print_node(node->left);
110 print(" ");
111 print_node(node->right);
112 print(")");
113 } break;
114 case NODE_MUL: {
115 print("(* ");
116 print_node(node->left);
117 print(" ");
118 print_node(node->right);
119 print(")");
120 } break;
121 default: {
122 eprintln("error: unknown node kind: %d", node->kind);
123 } break;
124 }
125}
126
127//
128// Parsing.
129//
130
37typedef struct Parser { 131typedef struct Parser {
38 Token current; 132 Token current;
39 Token previous; 133 Token previous;
40 Token *tokens; 134 Token *tokens;
41 sz idx; 135 sz idx;
136
137 // Error handling.
42 bool err; 138 bool err;
43 bool panic; 139 bool panic;
140
141 // Storage.
142 Node **nodes;
143 Arena *storage;
44} Parser; 144} Parser;
45 145
46typedef enum { 146typedef enum {
@@ -80,6 +180,7 @@ ParseRule parse_rules[] = {
80 [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, 180 [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR},
81 [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, 181 [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR},
82 [TOK_NUMBER] = {parse_number, NULL, PREC_NONE}, 182 [TOK_NUMBER] = {parse_number, NULL, PREC_NONE},
183 [TOK_EOF] = {NULL, NULL, PREC_NONE},
83}; 184};
84 185
85void 186void
@@ -91,9 +192,9 @@ parse_emit_err(Parser *parser, Token token, Str msg) {
91 parser->err = true; 192 parser->err = true;
92 eprint("%d:%d: error: %s", token.line, token.col, msg); 193 eprint("%d:%d: error: %s", token.line, token.col, msg);
93 194
94 if (token.type == TOK_EOF) { 195 if (token.kind == TOK_EOF) {
95 eprintln(" -> at end of the file"); 196 eprintln(" -> at end of the file");
96 } else if (token.type != TOK_UNKNOWN) { 197 } else if (token.kind != TOK_UNKNOWN) {
97 eprintln(" -> at %s", token.val); 198 eprintln(" -> at %s", token.val);
98 } 199 }
99} 200}
@@ -106,8 +207,8 @@ parse_advance(Parser *parser) {
106} 207}
107 208
108static void 209static void
109parse_consume(Parser *parser, TokenType type, Str msg) { 210parse_consume(Parser *parser, TokenKind kind, Str msg) {
110 if (parser->current.type == type) { 211 if (parser->current.kind == kind) {
111 parse_advance(parser); 212 parse_advance(parser);
112 return; 213 return;
113 } 214 }
@@ -117,16 +218,16 @@ parse_consume(Parser *parser, TokenType type, Str msg) {
117static void 218static void
118parse_prec(Parser *parser, ParsePrecedence precedence) { 219parse_prec(Parser *parser, ParsePrecedence precedence) {
119 parse_advance(parser); 220 parse_advance(parser);
120 ParseFn prefix = parse_rules[parser->previous.type].prefix; 221 ParseFn prefix = parse_rules[parser->previous.kind].prefix;
121 if (prefix == NULL) { 222 if (prefix == NULL) {
122 parse_emit_err(parser, parser->previous, cstr("expected expression")); 223 parse_emit_err(parser, parser->previous, cstr("expected expression"));
123 return; 224 return;
124 } 225 }
125 prefix(parser); 226 prefix(parser);
126 227
127 while (precedence <= parse_rules[parser->current.type].precedence) { 228 while (precedence <= parse_rules[parser->current.kind].precedence) {
128 parse_advance(parser); 229 parse_advance(parser);
129 ParseFn infix = parse_rules[parser->previous.type].infix; 230 ParseFn infix = parse_rules[parser->previous.kind].infix;
130 if (infix == NULL) { 231 if (infix == NULL) {
131 parse_emit_err(parser, parser->previous, 232 parse_emit_err(parser, parser->previous,
132 cstr("expected expression")); 233 cstr("expected expression"));
@@ -138,13 +239,15 @@ parse_prec(Parser *parser, ParsePrecedence precedence) {
138 239
139void 240void
140parse_unary(Parser *parser) { 241parse_unary(Parser *parser) {
242#if DEBUG == 1
141 print("parsing unary "); 243 print("parsing unary ");
142 print_token(parser->previous); 244 print_token(parser->previous);
245#endif
143 parse_prec(parser, PREC_ASSIGNMENT); 246 parse_prec(parser, PREC_ASSIGNMENT);
144 TokenType type = parser->previous.type; 247 TokenKind kind = parser->previous.kind;
145 parse_expr(parser); 248 parse_expr(parser);
146 // TODO: ... 249 // TODO: ...
147 switch (type) { 250 switch (kind) {
148 // case TOKEN_MINUS: emitByte(OP_NEGATE); break; 251 // case TOKEN_MINUS: emitByte(OP_NEGATE); break;
149 default: 252 default:
150 return; // Unreachable. 253 return; // Unreachable.
@@ -153,47 +256,113 @@ parse_unary(Parser *parser) {
153 256
154void 257void
155parse_binary(Parser *parser) { 258parse_binary(Parser *parser) {
259#if DEBUG == 1
156 print("parsing binary "); 260 print("parsing binary ");
157 print_token(parser->previous); 261 print_token(parser->previous);
158 TokenType operatorType = parser->previous.type; 262#endif
159 ParseRule rule = parse_rules[operatorType]; 263 TokenKind kind = parser->previous.kind;
264 ParseRule rule = parse_rules[kind];
160 parse_prec(parser, rule.precedence + 1); 265 parse_prec(parser, rule.precedence + 1);
161 266
162 // TODO: ... 267 Node *node;
163 // switch (operatorType) { 268 switch (kind) {
164 // case TOKEN_PLUS: emitByte(OP_ADD); break; 269 case TOK_ADD: {
165 // case TOKEN_MINUS: emitByte(OP_SUBTRACT); break; 270 node = node_alloc(NODE_ADD, parser->previous, parser->storage);
166 // case TOKEN_STAR: emitByte(OP_MULTIPLY); break; 271 } break;
167 // case TOKEN_SLASH: emitByte(OP_DIVIDE); break; 272 case TOK_SUB: {
168 // default: return; // Unreachable. 273 node = node_alloc(NODE_SUB, parser->previous, parser->storage);
169 // } 274 } break;
275 case TOK_MUL: {
276 node = node_alloc(NODE_MUL, parser->previous, parser->storage);
277 } break;
278 case TOK_DIV: {
279 node = node_alloc(NODE_DIV, parser->previous, parser->storage);
280 } break;
281 default: {
282 parse_emit_err(parser, parser->previous, cstr("unreachable"));
283 return;
284 }
285 }
286 node->right = array_pop(parser->nodes);
287 node->left = array_pop(parser->nodes);
288 array_push(parser->nodes, node, parser->storage);
170} 289}
171 290
172void 291void
173parse_number(Parser *parser) { 292parse_number(Parser *parser) {
293#if DEBUG == 1
174 print("parsing number "); 294 print("parsing number ");
175 print_token(parser->previous); 295 print_token(parser->previous);
176 // TODO: ... 296#endif
177 // double value = strtod(parser.previous.start, NULL); 297 Node *node = node_alloc(NODE_NUMBER, parser->previous, parser->storage);
178 // emitConstant(value); 298 node->value.i = str_to_int(parser->previous.val);
299 // TODO: handle sign and/or floating point values.
300 array_push(parser->nodes, node, parser->storage);
179} 301}
180 302
181void 303void
182parse_grouping(Parser *parser) { 304parse_grouping(Parser *parser) {
305#if DEBUG == 1
183 print("parsing group "); 306 print("parsing group ");
184 print_token(parser->previous); 307 print_token(parser->previous);
308#endif
185 parse_expr(parser); 309 parse_expr(parser);
186 parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression")); 310 parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression"));
187} 311}
188 312
189void 313void
190parse_expr(Parser *parser) { 314parse_expr(Parser *parser) {
191 // TODO: ...
192 // Can this be prec_none instead? 315 // Can this be prec_none instead?
193 parse_prec(parser, PREC_ASSIGNMENT); 316 parse_prec(parser, PREC_ASSIGNMENT);
194} 317}
195 318
196void 319void
320graph_node(Node *node) {
321 if (node == NULL) {
322 return;
323 }
324 print("%d [width=2.5,shape=Mrecord,label=\"", node->id);
325 print("<top> %s ", node_str[node->kind]);
326 switch (node->kind) {
327 case NODE_NUMBER: {
328 print("| Value: %d", node->value.i);
329 } break;
330 default:
331 break;
332 }
333 print("| Line: %d | Col: %d ", node->line, node->col);
334 println("\"];");
335 if (node->left) {
336 println("%d:top:e->%d:top:w;", node->id, node->left->id);
337 graph_node(node->left);
338 }
339 if (node->right) {
340 println("%d:top:e->%d:top:w;", node->id, node->right->id);
341 graph_node(node->right);
342 }
343}
344
345void
346graph_ast(Node **roots) {
347 if (roots == NULL) {
348 return;
349 }
350 println("digraph ast {");
351 println("rankdir=LR;");
352 println("ranksep=\"0.95 equally\";");
353 println("nodesep=\"0.5 equally\";");
354 println("overlap=scale;");
355 println("bgcolor=\"transparent\";");
356 for (sz i = 0; i < array_size(roots); ++i) {
357 println("subgraph %d {", i);
358 Node *root = roots[array_size(roots) - 1 - i];
359 graph_node(root);
360 println("}");
361 }
362 println("}");
363}
364
365void
197process_file(Str path) { 366process_file(Str path) {
198 Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); 367 Arena lexer_arena = arena_create(LEXER_MEM, os_allocator);
199 368
@@ -208,11 +377,11 @@ process_file(Str path) {
208 Scanner scanner = {.str = file.data}; 377 Scanner scanner = {.str = file.data};
209 Token *tokens = NULL; 378 Token *tokens = NULL;
210 Token tok = {0}; 379 Token tok = {0};
211 while (tok.type != TOK_EOF) { 380 while (tok.kind != TOK_EOF) {
212 tok = scan_token(&scanner); 381 tok = scan_token(&scanner);
213 if (tok.type == TOK_UNKNOWN) { 382 if (tok.kind == TOK_UNKNOWN) {
214 eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col, 383 eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col,
215 token_str[tok.type], tok.val); 384 token_str[tok.kind], tok.val);
216 errors++; 385 errors++;
217 continue; 386 continue;
218 } 387 }
@@ -225,11 +394,31 @@ process_file(Str path) {
225 } 394 }
226 395
227 // Parser. 396 // Parser.
228 Parser parser = {.tokens = tokens}; 397 Parser parser = {
398 .tokens = tokens,
399 .storage = &lexer_arena,
400 };
401 array_init(parser.nodes, 256, parser.storage);
229 parse_advance(&parser); 402 parse_advance(&parser);
230 parse_expr(&parser); 403 while (parser.current.kind != TOK_EOF) {
231 parse_consume(&parser, TOK_EOF, cstr("expected end of file")); 404 parse_expr(&parser);
405 }
406 if (parser.err) {
407 goto stop;
408 }
409 if (mode == PRINT_PARSE) {
410 graph_ast(parser.nodes);
411 goto stop;
412 }
232 413
414 sz n_roots = array_size(parser.nodes);
415 for (sz i = 0; i < n_roots; i++) {
416 // The parser stores the root nodes as a stack.
417 Node *root = parser.nodes[i];
418 print_node(root); println("");
419 (void)root;
420 }
421 parse_consume(&parser, TOK_EOF, cstr("expected end of file"));
233 // print_tokens(path, tokens); // DEBUG 422 // print_tokens(path, tokens); // DEBUG
234 423
235stop: 424stop:
diff --git a/tests/expressions.bad b/tests/expressions.bad
index c992a7a..af60b9b 100644
--- a/tests/expressions.bad
+++ b/tests/expressions.bad
@@ -1,4 +1,7 @@
11 + 2 * 5 - 2 11 + 2
2; 1 + 2 * (5 - 2) 2
3; (1 + 2 * (5 - 2)) 31 + 2 * 3
4; (1 + 2 * 5 - 2) 4
51 + 2 * 3 - 4
6
71 + 2 * (3 - 4)