Add graphviz visualization for the parse tree

author: Bad Diode <bd@badd10de.dev> 2024-06-16 15:00:19 +0200
committer: Bad Diode <bd@badd10de.dev> 2024-06-16 15:00:19 +0200
commit: 2e7f414c65d89ebe52570b0b0fb9b7ff2585bf96 (patch)
tree: 61a0178c877e69f674daff465fba30af76b379ab
parent: c2c8796511c90930c41700f5fcd2043a5c4405c9 (diff)
download: bdl-2e7f414c65d89ebe52570b0b0fb9b7ff2585bf96.tar.gz
bdl-2e7f414c65d89ebe52570b0b0fb9b7ff2585bf96.zip
5 files changed, 258 insertions, 50 deletions
diff --git a/Makefile b/Makefile
index 987ecd8..35447d3 100644
--- a/Makefile
+++ b/Makefile
@@ -5,10 +5,17 @@
 SRC_DIR     := src
 BUILD_DIR   := build
 SRC_MAIN    := $(SRC_DIR)/main.c
+SRC_BAD     := tests/expressions.bad
 WATCH_SRC   := $(shell find $(SRC_DIR) -name "*.c" -or -name "*.s" -or -name "*.h")
 INC_DIRS    := $(shell find $(SRC_DIR) -type d)
 INC_FLAGS   := $(addprefix -I,$(INC_DIRS))
+# Graphviz viz command.
+DOT := dot -Gmargin=0.7 -Gcolor=white -Gfontcolor=white \
+                -Ncolor=white -Nfontcolor=white -Ecolor=white \
+                -Nfontname="Iosevka Nerd Font" -Nfontsize=15 \
+                -T png | kitty +kitten icat
 # Output executable.
 TARGET := bdl
 BIN    := $(BUILD_DIR)/$(TARGET)
@@ -44,19 +51,23 @@ $(BUILD_DIR):
        mkdir -p $(BUILD_DIR)
 run: $(BIN)
-        $(BIN) tests/expressions.bad
+        $(BIN) $(SRC_BAD)
-viz_lex: $(BIN)
+graph-tokens: $(BIN)
-        $(BIN) -pl example.bdl
+        # TODO: ...
+        $(BIN) -pl $(SRC_BAD)
-viz_par: $(BIN)
+graph-parse: $(BIN)
-        $(BIN) -pp example.bdl | idot
+        @echo "parsing tree for: '$(SRC_BAD)'"
+        @$(BIN) -pp $(SRC_BAD) | $(DOT)
-viz_sem: $(BIN)
+graph-semantic: $(BIN)
-        $(BIN) -ps example.bdl | idot
+        @echo "semantic tree for: '$(SRC_BAD)'"
+        @$(BIN) -ps $(SRC_BAD) | $(DOT)
-viz_sym: $(BIN)
+graph-symbols: $(BIN)
-        $(BIN) -pt example.bdl | idot
+        @echo "symbol table for: '$(SRC_BAD)'"
+        @$(BIN) -pt $(SRC_BAD) | $(DOT)
 # Remove build directory.
 clean:
diff --git a/src/badlib.h b/src/badlib.h
index 71f4bee..4944d2e 100644
--- a/src/badlib.h
+++ b/src/badlib.h
@@ -832,6 +832,9 @@ _array_maybe_grow(void *arr, sz type_size, Arena *a) {
 static inline char *
 _array_insert(char *arr, const char *src, sz n_bytes, sz type_size, Arena *a) {
+    if (!arr) {
+        arr = _array_reserve(0, 0, a);
+    }
    ArrayHeader *head = array_head(arr);
    sz new_size = n_bytes + head->size;
    if (new_size > head->cap * type_size) {
diff --git a/src/lexer.c b/src/lexer.c
index df998f2..6a5621c 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -1,6 +1,8 @@
+#include "badlib.h"
 #define LEXER_MEM GB(2)
-typedef enum TokenType {
+typedef enum TokenKind {
    TOK_UNKNOWN = 0,
    // Parentheses.
@@ -65,7 +67,7 @@ typedef enum TokenType {
    // End of file.
    TOK_EOF,
-} TokenType;
+} TokenKind;
 Str token_str[] = {
    [TOK_UNKNOWN] = cstr("UNKNOWN"),
@@ -135,7 +137,7 @@ Str token_str[] = {
 };
 typedef struct Token {
-    TokenType type;
+    TokenKind kind;
    Str val;
    sz line;
    sz col;
@@ -256,7 +258,7 @@ scan_skip_until_valid(Scanner *scanner) {
 }
 Token
-emit_token(Scanner current, Scanner *scanner, TokenType t) {
+emit_token(Scanner current, Scanner *scanner, TokenKind t) {
    Str val = current.str;
    val.size = current.str.size - scanner->str.size;
    val.size = val.size < 0 ? 0 : val.size;
@@ -264,7 +266,7 @@ emit_token(Scanner current, Scanner *scanner, TokenType t) {
        .val = val,
        .line = current.line + 1,
        .col = current.col + 1,
-        .type = t,
+        .kind = t,
    };
 }
@@ -274,7 +276,7 @@ emit_token_err(Scanner *scanner, Str err_msg) {
        .line = scanner->line + 1,
        .col = scanner->col + 1,
        .val = err_msg,
-        .type = TOK_UNKNOWN,
+        .kind = TOK_UNKNOWN,
    };
 }
@@ -430,7 +432,7 @@ scan_token(Scanner *scanner) {
                *scanner = current;
                return emit_token_number(scanner);
            }
-            return emit_token(current, scanner, TOK_ADD);
+            return emit_token(current, scanner, TOK_SUB);
        };
        case '*':
            return emit_token(current, scanner, TOK_MUL);
diff --git a/src/main.c b/src/main.c
index 44a7bf1..cea7a45 100644
--- a/src/main.c
+++ b/src/main.c
@@ -22,7 +22,7 @@ init(void) {
 void
 print_token(Token tok) {
-    println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.type], tok.val);
+    println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.kind], tok.val);
 }
 void
@@ -34,13 +34,113 @@ print_tokens(Str path, Token *tokens) {
    }
 }
+//
+// Nodes
+//
+typedef enum NodeKind {
+    NODE_NUMBER,
+    // TODO: probably want to handle ints/unsigneds/floats separately.
+    NODE_ADD,
+    NODE_SUB,
+    NODE_DIV,
+    NODE_MUL,
+    // TODO: MOD
+} NodeKind;
+Str node_str[] = {
+    [NODE_NUMBER] = cstr("NUM"), [NODE_ADD] = cstr("ADD"),
+    [NODE_SUB] = cstr("SUB"),    [NODE_DIV] = cstr("DIV"),
+    [NODE_MUL] = cstr("MUL"),
+};
+typedef struct Node {
+    sz id;
+    sz line;
+    sz col;
+    NodeKind kind;
+    union {
+        f64 d;
+        f32 f;
+        sz i;
+        u64 u;
+    } value;
+    struct Node *left;
+    struct Node *right;
+} Node;
+Node *
+node_alloc(NodeKind kind, Token tok, Arena *a) {
+    static sz id = 0;
+    Node *node = arena_calloc((sz)sizeof(Node), a);
+    node->id = id++;
+    node->kind = kind;
+    node->line = tok.line;
+    node->col = tok.col;
+    return node;
+}
+void
+print_node(Node *node) {
+    if (!node) {
+        return;
+    }
+    switch (node->kind) {
+        case NODE_NUMBER: {
+            print("%d", node->value.i);
+        } break;
+        case NODE_ADD: {
+            print("(+ ");
+            print_node(node->left);
+            print(" ");
+            print_node(node->right);
+            print(")");
+        } break;
+        case NODE_SUB: {
+            print("(- ");
+            print_node(node->left);
+            print(" ");
+            print_node(node->right);
+            print(")");
+        } break;
+        case NODE_DIV: {
+            print("(/ ");
+            print_node(node->left);
+            print(" ");
+            print_node(node->right);
+            print(")");
+        } break;
+        case NODE_MUL: {
+            print("(* ");
+            print_node(node->left);
+            print(" ");
+            print_node(node->right);
+            print(")");
+        } break;
+        default: {
+            eprintln("error: unknown node kind: %d", node->kind);
+        } break;
+    }
+}
+//
+// Parsing.
+//
 typedef struct Parser {
    Token current;
    Token previous;
    Token *tokens;
    sz idx;
+    // Error handling.
    bool err;
    bool panic;
+    // Storage.
+    Node **nodes;
+    Arena *storage;
 } Parser;
 typedef enum {
@@ -80,6 +180,7 @@ ParseRule parse_rules[] = {
    [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR},
    [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR},
    [TOK_NUMBER] = {parse_number, NULL, PREC_NONE},
+    [TOK_EOF] = {NULL, NULL, PREC_NONE},
 };
 void
@@ -91,9 +192,9 @@ parse_emit_err(Parser *parser, Token token, Str msg) {
    parser->err = true;
    eprint("%d:%d: error: %s", token.line, token.col, msg);
-    if (token.type == TOK_EOF) {
+    if (token.kind == TOK_EOF) {
        eprintln(" -> at end of the file");
-    } else if (token.type != TOK_UNKNOWN) {
+    } else if (token.kind != TOK_UNKNOWN) {
        eprintln(" -> at %s", token.val);
    }
 }
@@ -106,8 +207,8 @@ parse_advance(Parser *parser) {
 }
 static void
-parse_consume(Parser *parser, TokenType type, Str msg) {
+parse_consume(Parser *parser, TokenKind kind, Str msg) {
-    if (parser->current.type == type) {
+    if (parser->current.kind == kind) {
        parse_advance(parser);
        return;
    }
@@ -117,16 +218,16 @@ parse_consume(Parser *parser, TokenType type, Str msg) {
 static void
 parse_prec(Parser *parser, ParsePrecedence precedence) {
    parse_advance(parser);
-    ParseFn prefix = parse_rules[parser->previous.type].prefix;
+    ParseFn prefix = parse_rules[parser->previous.kind].prefix;
    if (prefix == NULL) {
        parse_emit_err(parser, parser->previous, cstr("expected expression"));
        return;
    }
    prefix(parser);
-    while (precedence <= parse_rules[parser->current.type].precedence) {
+    while (precedence <= parse_rules[parser->current.kind].precedence) {
        parse_advance(parser);
-        ParseFn infix = parse_rules[parser->previous.type].infix;
+        ParseFn infix = parse_rules[parser->previous.kind].infix;
        if (infix == NULL) {
            parse_emit_err(parser, parser->previous,
                           cstr("expected expression"));
@@ -138,13 +239,15 @@ parse_prec(Parser *parser, ParsePrecedence precedence) {
 void
 parse_unary(Parser *parser) {
+#if DEBUG == 1
    print("parsing unary ");
    print_token(parser->previous);
+#endif
    parse_prec(parser, PREC_ASSIGNMENT);
-    TokenType type = parser->previous.type;
+    TokenKind kind = parser->previous.kind;
    parse_expr(parser);
    // TODO: ...
-    switch (type) {
+    switch (kind) {
        // case TOKEN_MINUS: emitByte(OP_NEGATE); break;
        default:
            return;  // Unreachable.
@@ -153,47 +256,113 @@ parse_unary(Parser *parser) {
 void
 parse_binary(Parser *parser) {
+#if DEBUG == 1
    print("parsing binary ");
    print_token(parser->previous);
-    TokenType operatorType = parser->previous.type;
+#endif
-    ParseRule rule = parse_rules[operatorType];
+    TokenKind kind = parser->previous.kind;
+    ParseRule rule = parse_rules[kind];
    parse_prec(parser, rule.precedence + 1);
-    // TODO: ...
+    Node *node;
-    // switch (operatorType) {
+    switch (kind) {
-    //   case TOKEN_PLUS:          emitByte(OP_ADD); break;
+        case TOK_ADD: {
-    //   case TOKEN_MINUS:         emitByte(OP_SUBTRACT); break;
+            node = node_alloc(NODE_ADD, parser->previous, parser->storage);
-    //   case TOKEN_STAR:          emitByte(OP_MULTIPLY); break;
+        } break;
-    //   case TOKEN_SLASH:         emitByte(OP_DIVIDE); break;
+        case TOK_SUB: {
-    //   default: return; // Unreachable.
+            node = node_alloc(NODE_SUB, parser->previous, parser->storage);
-    // }
+        } break;
+        case TOK_MUL: {
+            node = node_alloc(NODE_MUL, parser->previous, parser->storage);
+        } break;
+        case TOK_DIV: {
+            node = node_alloc(NODE_DIV, parser->previous, parser->storage);
+        } break;
+        default: {
+            parse_emit_err(parser, parser->previous, cstr("unreachable"));
+            return;
+        }
+    }
+    node->right = array_pop(parser->nodes);
+    node->left = array_pop(parser->nodes);
+    array_push(parser->nodes, node, parser->storage);
 }
 void
 parse_number(Parser *parser) {
+#if DEBUG == 1
    print("parsing number ");
    print_token(parser->previous);
-    // TODO: ...
+#endif
-    // double value = strtod(parser.previous.start, NULL);
+    Node *node = node_alloc(NODE_NUMBER, parser->previous, parser->storage);
-    // emitConstant(value);
+    node->value.i = str_to_int(parser->previous.val);
+    // TODO: handle sign and/or floating point values.
+    array_push(parser->nodes, node, parser->storage);
 }
 void
 parse_grouping(Parser *parser) {
+#if DEBUG == 1
    print("parsing group  ");
    print_token(parser->previous);
+#endif
    parse_expr(parser);
    parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression"));
 }
 void
 parse_expr(Parser *parser) {
-    // TODO: ...
    // Can this be prec_none instead?
    parse_prec(parser, PREC_ASSIGNMENT);
 }
 void
+graph_node(Node *node) {
+    if (node == NULL) {
+        return;
+    }
+    print("%d [width=2.5,shape=Mrecord,label=\"", node->id);
+    print("<top> %s ", node_str[node->kind]);
+    switch (node->kind) {
+        case NODE_NUMBER: {
+            print("| Value: %d", node->value.i);
+        } break;
+        default:
+            break;
+    }
+    print("| Line: %d | Col: %d ", node->line, node->col);
+    println("\"];");
+    if (node->left) {
+        println("%d:top:e->%d:top:w;", node->id, node->left->id);
+        graph_node(node->left);
+    }
+    if (node->right) {
+        println("%d:top:e->%d:top:w;", node->id, node->right->id);
+        graph_node(node->right);
+    }
+}
+void
+graph_ast(Node **roots) {
+    if (roots == NULL) {
+        return;
+    }
+    println("digraph ast {");
+    println("rankdir=LR;");
+    println("ranksep=\"0.95 equally\";");
+    println("nodesep=\"0.5 equally\";");
+    println("overlap=scale;");
+    println("bgcolor=\"transparent\";");
+    for (sz i = 0; i < array_size(roots); ++i) {
+        println("subgraph %d {", i);
+        Node *root = roots[array_size(roots) - 1 - i];
+        graph_node(root);
+        println("}");
+    }
+    println("}");
+}
+void
 process_file(Str path) {
    Arena lexer_arena = arena_create(LEXER_MEM, os_allocator);
@@ -208,11 +377,11 @@ process_file(Str path) {
    Scanner scanner = {.str = file.data};
    Token *tokens = NULL;
    Token tok = {0};
-    while (tok.type != TOK_EOF) {
+    while (tok.kind != TOK_EOF) {
        tok = scan_token(&scanner);
-        if (tok.type == TOK_UNKNOWN) {
+        if (tok.kind == TOK_UNKNOWN) {
            eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col,
-                     token_str[tok.type], tok.val);
+                     token_str[tok.kind], tok.val);
            errors++;
            continue;
        }
@@ -225,11 +394,31 @@ process_file(Str path) {
    }
    // Parser.
-    Parser parser = {.tokens = tokens};
+    Parser parser = {
+        .tokens = tokens,
+        .storage = &lexer_arena,
+    };
+    array_init(parser.nodes, 256, parser.storage);
    parse_advance(&parser);
-    parse_expr(&parser);
+    while (parser.current.kind != TOK_EOF) {
-    parse_consume(&parser, TOK_EOF, cstr("expected end of file"));
+        parse_expr(&parser);
+    }
+    if (parser.err) {
+        goto stop;
+    }
+    if (mode == PRINT_PARSE) {
+        graph_ast(parser.nodes);
+        goto stop;
+    }
+    sz n_roots = array_size(parser.nodes);
+    for (sz i = 0; i < n_roots; i++) {
+        // The parser stores the root nodes as a stack.
+        Node *root = parser.nodes[i];
+        print_node(root); println("");
+        (void)root;
+    }
+    parse_consume(&parser, TOK_EOF, cstr("expected end of file"));
    // print_tokens(path, tokens); // DEBUG
 stop:
diff --git a/tests/expressions.bad b/tests/expressions.bad
index c992a7a..af60b9b 100644
--- a/tests/expressions.bad
+++ b/tests/expressions.bad
@@ -1,4 +1,7 @@
-1 + 2 * 5 - 2
+1 + 2
-; 1 + 2 * (5 - 2)
-; (1 + 2 * (5 - 2))
+1 + 2 * 3
-; (1 + 2 * 5 - 2)
+1 + 2 * 3 - 4
+1 + 2 * (3 - 4)
author	Bad Diode <bd@badd10de.dev>	2024-06-16 15:00:19 +0200
committer	Bad Diode <bd@badd10de.dev>	2024-06-16 15:00:19 +0200
commit	2e7f414c65d89ebe52570b0b0fb9b7ff2585bf96 (patch)
tree	61a0178c877e69f674daff465fba30af76b379ab
parent	c2c8796511c90930c41700f5fcd2043a5c4405c9 (diff)
download	bdl-2e7f414c65d89ebe52570b0b0fb9b7ff2585bf96.tar.gz bdl-2e7f414c65d89ebe52570b0b0fb9b7ff2585bf96.zip

diff --git a/Makefile b/Makefile index 987ecd8..35447d3 100644 --- a/Makefile +++ b/Makefile
@@ -5,10 +5,17 @@
5	SRC_DIR := src	5	SRC_DIR := src
6	BUILD_DIR := build	6	BUILD_DIR := build
7	SRC_MAIN := $(SRC_DIR)/main.c	7	SRC_MAIN := $(SRC_DIR)/main.c
		8	SRC_BAD := tests/expressions.bad
8	WATCH_SRC := $(shell find $(SRC_DIR) -name ".c" -or -name ".s" -or -name "*.h")	9	WATCH_SRC := $(shell find $(SRC_DIR) -name ".c" -or -name ".s" -or -name "*.h")
9	INC_DIRS := $(shell find $(SRC_DIR) -type d)	10	INC_DIRS := $(shell find $(SRC_DIR) -type d)
10	INC_FLAGS := $(addprefix -I,$(INC_DIRS))	11	INC_FLAGS := $(addprefix -I,$(INC_DIRS))
11		12
		13	# Graphviz viz command.
		14	DOT := dot -Gmargin=0.7 -Gcolor=white -Gfontcolor=white \
		15	-Ncolor=white -Nfontcolor=white -Ecolor=white \
		16	-Nfontname="Iosevka Nerd Font" -Nfontsize=15 \
		17	-T png \| kitty +kitten icat
		18
12	# Output executable.	19	# Output executable.
13	TARGET := bdl	20	TARGET := bdl
14	BIN := $(BUILD_DIR)/$(TARGET)	21	BIN := $(BUILD_DIR)/$(TARGET)
@@ -44,19 +51,23 @@ $(BUILD_DIR):
44	mkdir -p $(BUILD_DIR)	51	mkdir -p $(BUILD_DIR)
45		52
46	run: $(BIN)	53	run: $(BIN)
47	$(BIN) tests/expressions.bad	54	$(BIN) $(SRC_BAD)
48		55
49	viz_lex: $(BIN)	56	graph-tokens: $(BIN)
50	$(BIN) -pl example.bdl	57	# TODO: ...
		58	$(BIN) -pl $(SRC_BAD)
51		59
52	viz_par: $(BIN)	60	graph-parse: $(BIN)
53	$(BIN) -pp example.bdl \| idot	61	@echo "parsing tree for: '$(SRC_BAD)'"
		62	@$(BIN) -pp $(SRC_BAD) \| $(DOT)
54		63
55	viz_sem: $(BIN)	64	graph-semantic: $(BIN)
56	$(BIN) -ps example.bdl \| idot	65	@echo "semantic tree for: '$(SRC_BAD)'"
		66	@$(BIN) -ps $(SRC_BAD) \| $(DOT)
57		67
58	viz_sym: $(BIN)	68	graph-symbols: $(BIN)
59	$(BIN) -pt example.bdl \| idot	69	@echo "symbol table for: '$(SRC_BAD)'"
		70	@$(BIN) -pt $(SRC_BAD) \| $(DOT)
60		71
61	# Remove build directory.	72	# Remove build directory.
62	clean:	73	clean:


diff --git a/src/badlib.h b/src/badlib.h index 71f4bee..4944d2e 100644 --- a/src/badlib.h +++ b/src/badlib.h
@@ -832,6 +832,9 @@ _array_maybe_grow(void arr, sz type_size, Arena a) {
832		832
833	static inline char *	833	static inline char *
834	_array_insert(char arr, const char src, sz n_bytes, sz type_size, Arena *a) {	834	_array_insert(char arr, const char src, sz n_bytes, sz type_size, Arena *a) {
		835	if (!arr) {
		836	arr = _array_reserve(0, 0, a);
		837	}
835	ArrayHeader *head = array_head(arr);	838	ArrayHeader *head = array_head(arr);
836	sz new_size = n_bytes + head->size;	839	sz new_size = n_bytes + head->size;
837	if (new_size > head->cap * type_size) {	840	if (new_size > head->cap * type_size) {


diff --git a/src/lexer.c b/src/lexer.c index df998f2..6a5621c 100644 --- a/src/lexer.c +++ b/src/lexer.c
@@ -1,6 +1,8 @@
		1	#include "badlib.h"
		2
1	#define LEXER_MEM GB(2)	3	#define LEXER_MEM GB(2)
2		4
3	typedef enum TokenType {	5	typedef enum TokenKind {
4	TOK_UNKNOWN = 0,	6	TOK_UNKNOWN = 0,
5		7
6	// Parentheses.	8	// Parentheses.
@@ -65,7 +67,7 @@ typedef enum TokenType {
65		67
66	// End of file.	68	// End of file.
67	TOK_EOF,	69	TOK_EOF,
68	} TokenType;	70	} TokenKind;
69		71
70	Str token_str[] = {	72	Str token_str[] = {
71	[TOK_UNKNOWN] = cstr("UNKNOWN"),	73	[TOK_UNKNOWN] = cstr("UNKNOWN"),
@@ -135,7 +137,7 @@ Str token_str[] = {
135	};	137	};
136		138
137	typedef struct Token {	139	typedef struct Token {
138	TokenType type;	140	TokenKind kind;
139	Str val;	141	Str val;
140	sz line;	142	sz line;
141	sz col;	143	sz col;
@@ -256,7 +258,7 @@ scan_skip_until_valid(Scanner *scanner) {
256	}	258	}
257		259
258	Token	260	Token
259	emit_token(Scanner current, Scanner *scanner, TokenType t) {	261	emit_token(Scanner current, Scanner *scanner, TokenKind t) {
260	Str val = current.str;	262	Str val = current.str;
261	val.size = current.str.size - scanner->str.size;	263	val.size = current.str.size - scanner->str.size;
262	val.size = val.size < 0 ? 0 : val.size;	264	val.size = val.size < 0 ? 0 : val.size;
@@ -264,7 +266,7 @@ emit_token(Scanner current, Scanner *scanner, TokenType t) {
264	.val = val,	266	.val = val,
265	.line = current.line + 1,	267	.line = current.line + 1,
266	.col = current.col + 1,	268	.col = current.col + 1,
267	.type = t,	269	.kind = t,
268	};	270	};
269	}	271	}
270		272
@@ -274,7 +276,7 @@ emit_token_err(Scanner *scanner, Str err_msg) {
274	.line = scanner->line + 1,	276	.line = scanner->line + 1,
275	.col = scanner->col + 1,	277	.col = scanner->col + 1,
276	.val = err_msg,	278	.val = err_msg,
277	.type = TOK_UNKNOWN,	279	.kind = TOK_UNKNOWN,
278	};	280	};
279	}	281	}
280		282
@@ -430,7 +432,7 @@ scan_token(Scanner *scanner) {
430	*scanner = current;	432	*scanner = current;
431	return emit_token_number(scanner);	433	return emit_token_number(scanner);
432	}	434	}
433	return emit_token(current, scanner, TOK_ADD);	435	return emit_token(current, scanner, TOK_SUB);
434	};	436	};
435	case '*':	437	case '*':
436	return emit_token(current, scanner, TOK_MUL);	438	return emit_token(current, scanner, TOK_MUL);


diff --git a/src/main.c b/src/main.c index 44a7bf1..cea7a45 100644 --- a/src/main.c +++ b/src/main.c
@@ -22,7 +22,7 @@ init(void) {
22		22
23	void	23	void
24	print_token(Token tok) {	24	print_token(Token tok) {
25	println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.type], tok.val);	25	println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.kind], tok.val);
26	}	26	}
27		27
28	void	28	void
@@ -34,13 +34,113 @@ print_tokens(Str path, Token *tokens) {
34	}	34	}
35	}	35	}
36		36
		37	//
		38	// Nodes
		39	//
		40
		41	typedef enum NodeKind {
		42	NODE_NUMBER,
		43	// TODO: probably want to handle ints/unsigneds/floats separately.
		44	NODE_ADD,
		45	NODE_SUB,
		46	NODE_DIV,
		47	NODE_MUL,
		48	// TODO: MOD
		49	} NodeKind;
		50
		51	Str node_str[] = {
		52	[NODE_NUMBER] = cstr("NUM"), [NODE_ADD] = cstr("ADD"),
		53	[NODE_SUB] = cstr("SUB"), [NODE_DIV] = cstr("DIV"),
		54	[NODE_MUL] = cstr("MUL"),
		55	};
		56
		57	typedef struct Node {
		58	sz id;
		59	sz line;
		60	sz col;
		61
		62	NodeKind kind;
		63	union {
		64	f64 d;
		65	f32 f;
		66	sz i;
		67	u64 u;
		68	} value;
		69	struct Node *left;
		70	struct Node *right;
		71	} Node;
		72
		73	Node *
		74	node_alloc(NodeKind kind, Token tok, Arena *a) {
		75	static sz id = 0;
		76	Node *node = arena_calloc((sz)sizeof(Node), a);
		77	node->id = id++;
		78	node->kind = kind;
		79	node->line = tok.line;
		80	node->col = tok.col;
		81	return node;
		82	}
		83
		84	void
		85	print_node(Node *node) {
		86	if (!node) {
		87	return;
		88	}
		89	switch (node->kind) {
		90	case NODE_NUMBER: {
		91	print("%d", node->value.i);
		92	} break;
		93	case NODE_ADD: {
		94	print("(+ ");
		95	print_node(node->left);
		96	print(" ");
		97	print_node(node->right);
		98	print(")");
		99	} break;
		100	case NODE_SUB: {
		101	print("(- ");
		102	print_node(node->left);
		103	print(" ");
		104	print_node(node->right);
		105	print(")");
		106	} break;
		107	case NODE_DIV: {
		108	print("(/ ");
		109	print_node(node->left);
		110	print(" ");
		111	print_node(node->right);
		112	print(")");
		113	} break;
		114	case NODE_MUL: {
		115	print("(* ");
		116	print_node(node->left);
		117	print(" ");
		118	print_node(node->right);
		119	print(")");
		120	} break;
		121	default: {
		122	eprintln("error: unknown node kind: %d", node->kind);
		123	} break;
		124	}
		125	}
		126
		127	//
		128	// Parsing.
		129	//
		130
37	typedef struct Parser {	131	typedef struct Parser {
38	Token current;	132	Token current;
39	Token previous;	133	Token previous;
40	Token *tokens;	134	Token *tokens;
41	sz idx;	135	sz idx;
		136
		137	// Error handling.
42	bool err;	138	bool err;
43	bool panic;	139	bool panic;
		140
		141	// Storage.
		142	Node **nodes;
		143	Arena *storage;
44	} Parser;	144	} Parser;
45		145
46	typedef enum {	146	typedef enum {
@@ -80,6 +180,7 @@ ParseRule parse_rules[] = {
80	[TOK_DIV] = {NULL, parse_binary, PREC_FACTOR},	180	[TOK_DIV] = {NULL, parse_binary, PREC_FACTOR},
81	[TOK_MUL] = {NULL, parse_binary, PREC_FACTOR},	181	[TOK_MUL] = {NULL, parse_binary, PREC_FACTOR},
82	[TOK_NUMBER] = {parse_number, NULL, PREC_NONE},	182	[TOK_NUMBER] = {parse_number, NULL, PREC_NONE},
		183	[TOK_EOF] = {NULL, NULL, PREC_NONE},
83	};	184	};
84		185
85	void	186	void
@@ -91,9 +192,9 @@ parse_emit_err(Parser *parser, Token token, Str msg) {
91	parser->err = true;	192	parser->err = true;
92	eprint("%d:%d: error: %s", token.line, token.col, msg);	193	eprint("%d:%d: error: %s", token.line, token.col, msg);
93		194
94	if (token.type == TOK_EOF) {	195	if (token.kind == TOK_EOF) {
95	eprintln(" -> at end of the file");	196	eprintln(" -> at end of the file");
96	} else if (token.type != TOK_UNKNOWN) {	197	} else if (token.kind != TOK_UNKNOWN) {
97	eprintln(" -> at %s", token.val);	198	eprintln(" -> at %s", token.val);
98	}	199	}
99	}	200	}
@@ -106,8 +207,8 @@ parse_advance(Parser *parser) {
106	}	207	}
107		208
108	static void	209	static void
109	parse_consume(Parser *parser, TokenType type, Str msg) {	210	parse_consume(Parser *parser, TokenKind kind, Str msg) {
110	if (parser->current.type == type) {	211	if (parser->current.kind == kind) {
111	parse_advance(parser);	212	parse_advance(parser);
112	return;	213	return;
113	}	214	}
@@ -117,16 +218,16 @@ parse_consume(Parser *parser, TokenType type, Str msg) {
117	static void	218	static void
118	parse_prec(Parser *parser, ParsePrecedence precedence) {	219	parse_prec(Parser *parser, ParsePrecedence precedence) {
119	parse_advance(parser);	220	parse_advance(parser);
120	ParseFn prefix = parse_rules[parser->previous.type].prefix;	221	ParseFn prefix = parse_rules[parser->previous.kind].prefix;
121	if (prefix == NULL) {	222	if (prefix == NULL) {
122	parse_emit_err(parser, parser->previous, cstr("expected expression"));	223	parse_emit_err(parser, parser->previous, cstr("expected expression"));
123	return;	224	return;
124	}	225	}
125	prefix(parser);	226	prefix(parser);
126		227
127	while (precedence <= parse_rules[parser->current.type].precedence) {	228	while (precedence <= parse_rules[parser->current.kind].precedence) {
128	parse_advance(parser);	229	parse_advance(parser);
129	ParseFn infix = parse_rules[parser->previous.type].infix;	230	ParseFn infix = parse_rules[parser->previous.kind].infix;
130	if (infix == NULL) {	231	if (infix == NULL) {
131	parse_emit_err(parser, parser->previous,	232	parse_emit_err(parser, parser->previous,
132	cstr("expected expression"));	233	cstr("expected expression"));
@@ -138,13 +239,15 @@ parse_prec(Parser *parser, ParsePrecedence precedence) {
138		239
139	void	240	void
140	parse_unary(Parser *parser) {	241	parse_unary(Parser *parser) {
		242	#if DEBUG == 1
141	print("parsing unary ");	243	print("parsing unary ");
142	print_token(parser->previous);	244	print_token(parser->previous);
		245	#endif
143	parse_prec(parser, PREC_ASSIGNMENT);	246	parse_prec(parser, PREC_ASSIGNMENT);
144	TokenType type = parser->previous.type;	247	TokenKind kind = parser->previous.kind;
145	parse_expr(parser);	248	parse_expr(parser);
146	// TODO: ...	249	// TODO: ...
147	switch (type) {	250	switch (kind) {
148	// case TOKEN_MINUS: emitByte(OP_NEGATE); break;	251	// case TOKEN_MINUS: emitByte(OP_NEGATE); break;
149	default:	252	default:
150	return; // Unreachable.	253	return; // Unreachable.
@@ -153,47 +256,113 @@ parse_unary(Parser *parser) {
153		256
154	void	257	void
155	parse_binary(Parser *parser) {	258	parse_binary(Parser *parser) {
		259	#if DEBUG == 1
156	print("parsing binary ");	260	print("parsing binary ");
157	print_token(parser->previous);	261	print_token(parser->previous);
158	TokenType operatorType = parser->previous.type;	262	#endif
159	ParseRule rule = parse_rules[operatorType];	263	TokenKind kind = parser->previous.kind;
		264	ParseRule rule = parse_rules[kind];
160	parse_prec(parser, rule.precedence + 1);	265	parse_prec(parser, rule.precedence + 1);
161		266
162	// TODO: ...	267	Node *node;
163	// switch (operatorType) {	268	switch (kind) {
164	// case TOKEN_PLUS: emitByte(OP_ADD); break;	269	case TOK_ADD: {
165	// case TOKEN_MINUS: emitByte(OP_SUBTRACT); break;	270	node = node_alloc(NODE_ADD, parser->previous, parser->storage);
166	// case TOKEN_STAR: emitByte(OP_MULTIPLY); break;	271	} break;
167	// case TOKEN_SLASH: emitByte(OP_DIVIDE); break;	272	case TOK_SUB: {
168	// default: return; // Unreachable.	273	node = node_alloc(NODE_SUB, parser->previous, parser->storage);
169	// }	274	} break;
		275	case TOK_MUL: {
		276	node = node_alloc(NODE_MUL, parser->previous, parser->storage);
		277	} break;
		278	case TOK_DIV: {
		279	node = node_alloc(NODE_DIV, parser->previous, parser->storage);
		280	} break;
		281	default: {
		282	parse_emit_err(parser, parser->previous, cstr("unreachable"));
		283	return;
		284	}
		285	}
		286	node->right = array_pop(parser->nodes);
		287	node->left = array_pop(parser->nodes);
		288	array_push(parser->nodes, node, parser->storage);
170	}	289	}
171		290
172	void	291	void
173	parse_number(Parser *parser) {	292	parse_number(Parser *parser) {
		293	#if DEBUG == 1
174	print("parsing number ");	294	print("parsing number ");
175	print_token(parser->previous);	295	print_token(parser->previous);
176	// TODO: ...	296	#endif
177	// double value = strtod(parser.previous.start, NULL);	297	Node *node = node_alloc(NODE_NUMBER, parser->previous, parser->storage);
178	// emitConstant(value);	298	node->value.i = str_to_int(parser->previous.val);
		299	// TODO: handle sign and/or floating point values.
		300	array_push(parser->nodes, node, parser->storage);
179	}	301	}
180		302
181	void	303	void
182	parse_grouping(Parser *parser) {	304	parse_grouping(Parser *parser) {
		305	#if DEBUG == 1
183	print("parsing group ");	306	print("parsing group ");
184	print_token(parser->previous);	307	print_token(parser->previous);
		308	#endif
185	parse_expr(parser);	309	parse_expr(parser);
186	parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression"));	310	parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression"));
187	}	311	}
188		312
189	void	313	void
190	parse_expr(Parser *parser) {	314	parse_expr(Parser *parser) {
191	// TODO: ...
192	// Can this be prec_none instead?	315	// Can this be prec_none instead?
193	parse_prec(parser, PREC_ASSIGNMENT);	316	parse_prec(parser, PREC_ASSIGNMENT);
194	}	317	}
195		318
196	void	319	void
		320	graph_node(Node *node) {
		321	if (node == NULL) {
		322	return;
		323	}
		324	print("%d [width=2.5,shape=Mrecord,label=\"", node->id);
		325	print("<top> %s ", node_str[node->kind]);
		326	switch (node->kind) {
		327	case NODE_NUMBER: {
		328	print("\| Value: %d", node->value.i);
		329	} break;
		330	default:
		331	break;
		332	}
		333	print("\| Line: %d \| Col: %d ", node->line, node->col);
		334	println("\"];");
		335	if (node->left) {
		336	println("%d:top:e->%d:top:w;", node->id, node->left->id);
		337	graph_node(node->left);
		338	}
		339	if (node->right) {
		340	println("%d:top:e->%d:top:w;", node->id, node->right->id);
		341	graph_node(node->right);
		342	}
		343	}
		344
		345	void
		346	graph_ast(Node **roots) {
		347	if (roots == NULL) {
		348	return;
		349	}
		350	println("digraph ast {");
		351	println("rankdir=LR;");
		352	println("ranksep=\"0.95 equally\";");
		353	println("nodesep=\"0.5 equally\";");
		354	println("overlap=scale;");
		355	println("bgcolor=\"transparent\";");
		356	for (sz i = 0; i < array_size(roots); ++i) {
		357	println("subgraph %d {", i);
		358	Node *root = roots[array_size(roots) - 1 - i];
		359	graph_node(root);
		360	println("}");
		361	}
		362	println("}");
		363	}
		364
		365	void
197	process_file(Str path) {	366	process_file(Str path) {
198	Arena lexer_arena = arena_create(LEXER_MEM, os_allocator);	367	Arena lexer_arena = arena_create(LEXER_MEM, os_allocator);
199		368
@@ -208,11 +377,11 @@ process_file(Str path) {
208	Scanner scanner = {.str = file.data};	377	Scanner scanner = {.str = file.data};
209	Token *tokens = NULL;	378	Token *tokens = NULL;
210	Token tok = {0};	379	Token tok = {0};
211	while (tok.type != TOK_EOF) {	380	while (tok.kind != TOK_EOF) {
212	tok = scan_token(&scanner);	381	tok = scan_token(&scanner);
213	if (tok.type == TOK_UNKNOWN) {	382	if (tok.kind == TOK_UNKNOWN) {
214	eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col,	383	eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col,
215	token_str[tok.type], tok.val);	384	token_str[tok.kind], tok.val);
216	errors++;	385	errors++;
217	continue;	386	continue;
218	}	387	}
@@ -225,11 +394,31 @@ process_file(Str path) {
225	}	394	}
226		395
227	// Parser.	396	// Parser.
228	Parser parser = {.tokens = tokens};	397	Parser parser = {
		398	.tokens = tokens,
		399	.storage = &lexer_arena,
		400	};
		401	array_init(parser.nodes, 256, parser.storage);
229	parse_advance(&parser);	402	parse_advance(&parser);
230	parse_expr(&parser);	403	while (parser.current.kind != TOK_EOF) {
231	parse_consume(&parser, TOK_EOF, cstr("expected end of file"));	404	parse_expr(&parser);
		405	}
		406	if (parser.err) {
		407	goto stop;
		408	}
		409	if (mode == PRINT_PARSE) {
		410	graph_ast(parser.nodes);
		411	goto stop;
		412	}
232		413
		414	sz n_roots = array_size(parser.nodes);
		415	for (sz i = 0; i < n_roots; i++) {
		416	// The parser stores the root nodes as a stack.
		417	Node *root = parser.nodes[i];
		418	print_node(root); println("");
		419	(void)root;
		420	}
		421	parse_consume(&parser, TOK_EOF, cstr("expected end of file"));
233	// print_tokens(path, tokens); // DEBUG	422	// print_tokens(path, tokens); // DEBUG
234		423
235	stop:	424	stop:


diff --git a/tests/expressions.bad b/tests/expressions.bad index c992a7a..af60b9b 100644 --- a/tests/expressions.bad +++ b/tests/expressions.bad
@@ -1,4 +1,7 @@
1	1 + 2 * 5 - 2	1	1 + 2
2	; 1 + 2 * (5 - 2)	2
3	; (1 + 2 * (5 - 2))	3	1 + 2 * 3
4	; (1 + 2 * 5 - 2)	4
		5	1 + 2 * 3 - 4
		6
		7	1 + 2 * (3 - 4)