From 2e7f414c65d89ebe52570b0b0fb9b7ff2585bf96 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Sun, 16 Jun 2024 15:00:19 +0200 Subject: Add graphviz visualization for the parse tree --- Makefile | 29 ++++-- src/badlib.h | 3 + src/lexer.c | 16 ++-- src/main.c | 249 ++++++++++++++++++++++++++++++++++++++++++++------ tests/expressions.bad | 11 ++- 5 files changed, 258 insertions(+), 50 deletions(-) diff --git a/Makefile b/Makefile index 987ecd8..35447d3 100644 --- a/Makefile +++ b/Makefile @@ -5,10 +5,17 @@ SRC_DIR := src BUILD_DIR := build SRC_MAIN := $(SRC_DIR)/main.c +SRC_BAD := tests/expressions.bad WATCH_SRC := $(shell find $(SRC_DIR) -name "*.c" -or -name "*.s" -or -name "*.h") INC_DIRS := $(shell find $(SRC_DIR) -type d) INC_FLAGS := $(addprefix -I,$(INC_DIRS)) +# Graphviz viz command. +DOT := dot -Gmargin=0.7 -Gcolor=white -Gfontcolor=white \ + -Ncolor=white -Nfontcolor=white -Ecolor=white \ + -Nfontname="Iosevka Nerd Font" -Nfontsize=15 \ + -T png | kitty +kitten icat + # Output executable. TARGET := bdl BIN := $(BUILD_DIR)/$(TARGET) @@ -44,19 +51,23 @@ $(BUILD_DIR): mkdir -p $(BUILD_DIR) run: $(BIN) - $(BIN) tests/expressions.bad + $(BIN) $(SRC_BAD) -viz_lex: $(BIN) - $(BIN) -pl example.bdl +graph-tokens: $(BIN) + # TODO: ... + $(BIN) -pl $(SRC_BAD) -viz_par: $(BIN) - $(BIN) -pp example.bdl | idot +graph-parse: $(BIN) + @echo "parsing tree for: '$(SRC_BAD)'" + @$(BIN) -pp $(SRC_BAD) | $(DOT) -viz_sem: $(BIN) - $(BIN) -ps example.bdl | idot +graph-semantic: $(BIN) + @echo "semantic tree for: '$(SRC_BAD)'" + @$(BIN) -ps $(SRC_BAD) | $(DOT) -viz_sym: $(BIN) - $(BIN) -pt example.bdl | idot +graph-symbols: $(BIN) + @echo "symbol table for: '$(SRC_BAD)'" + @$(BIN) -pt $(SRC_BAD) | $(DOT) # Remove build directory. clean: diff --git a/src/badlib.h b/src/badlib.h index 71f4bee..4944d2e 100644 --- a/src/badlib.h +++ b/src/badlib.h @@ -832,6 +832,9 @@ _array_maybe_grow(void *arr, sz type_size, Arena *a) { static inline char * _array_insert(char *arr, const char *src, sz n_bytes, sz type_size, Arena *a) { + if (!arr) { + arr = _array_reserve(0, 0, a); + } ArrayHeader *head = array_head(arr); sz new_size = n_bytes + head->size; if (new_size > head->cap * type_size) { diff --git a/src/lexer.c b/src/lexer.c index df998f2..6a5621c 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,6 +1,8 @@ +#include "badlib.h" + #define LEXER_MEM GB(2) -typedef enum TokenType { +typedef enum TokenKind { TOK_UNKNOWN = 0, // Parentheses. @@ -65,7 +67,7 @@ typedef enum TokenType { // End of file. TOK_EOF, -} TokenType; +} TokenKind; Str token_str[] = { [TOK_UNKNOWN] = cstr("UNKNOWN"), @@ -135,7 +137,7 @@ Str token_str[] = { }; typedef struct Token { - TokenType type; + TokenKind kind; Str val; sz line; sz col; @@ -256,7 +258,7 @@ scan_skip_until_valid(Scanner *scanner) { } Token -emit_token(Scanner current, Scanner *scanner, TokenType t) { +emit_token(Scanner current, Scanner *scanner, TokenKind t) { Str val = current.str; val.size = current.str.size - scanner->str.size; val.size = val.size < 0 ? 0 : val.size; @@ -264,7 +266,7 @@ emit_token(Scanner current, Scanner *scanner, TokenType t) { .val = val, .line = current.line + 1, .col = current.col + 1, - .type = t, + .kind = t, }; } @@ -274,7 +276,7 @@ emit_token_err(Scanner *scanner, Str err_msg) { .line = scanner->line + 1, .col = scanner->col + 1, .val = err_msg, - .type = TOK_UNKNOWN, + .kind = TOK_UNKNOWN, }; } @@ -430,7 +432,7 @@ scan_token(Scanner *scanner) { *scanner = current; return emit_token_number(scanner); } - return emit_token(current, scanner, TOK_ADD); + return emit_token(current, scanner, TOK_SUB); }; case '*': return emit_token(current, scanner, TOK_MUL); diff --git a/src/main.c b/src/main.c index 44a7bf1..cea7a45 100644 --- a/src/main.c +++ b/src/main.c @@ -22,7 +22,7 @@ init(void) { void print_token(Token tok) { - println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.type], tok.val); + println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.kind], tok.val); } void @@ -34,13 +34,113 @@ print_tokens(Str path, Token *tokens) { } } +// +// Nodes +// + +typedef enum NodeKind { + NODE_NUMBER, + // TODO: probably want to handle ints/unsigneds/floats separately. + NODE_ADD, + NODE_SUB, + NODE_DIV, + NODE_MUL, + // TODO: MOD +} NodeKind; + +Str node_str[] = { + [NODE_NUMBER] = cstr("NUM"), [NODE_ADD] = cstr("ADD"), + [NODE_SUB] = cstr("SUB"), [NODE_DIV] = cstr("DIV"), + [NODE_MUL] = cstr("MUL"), +}; + +typedef struct Node { + sz id; + sz line; + sz col; + + NodeKind kind; + union { + f64 d; + f32 f; + sz i; + u64 u; + } value; + struct Node *left; + struct Node *right; +} Node; + +Node * +node_alloc(NodeKind kind, Token tok, Arena *a) { + static sz id = 0; + Node *node = arena_calloc((sz)sizeof(Node), a); + node->id = id++; + node->kind = kind; + node->line = tok.line; + node->col = tok.col; + return node; +} + +void +print_node(Node *node) { + if (!node) { + return; + } + switch (node->kind) { + case NODE_NUMBER: { + print("%d", node->value.i); + } break; + case NODE_ADD: { + print("(+ "); + print_node(node->left); + print(" "); + print_node(node->right); + print(")"); + } break; + case NODE_SUB: { + print("(- "); + print_node(node->left); + print(" "); + print_node(node->right); + print(")"); + } break; + case NODE_DIV: { + print("(/ "); + print_node(node->left); + print(" "); + print_node(node->right); + print(")"); + } break; + case NODE_MUL: { + print("(* "); + print_node(node->left); + print(" "); + print_node(node->right); + print(")"); + } break; + default: { + eprintln("error: unknown node kind: %d", node->kind); + } break; + } +} + +// +// Parsing. +// + typedef struct Parser { Token current; Token previous; Token *tokens; sz idx; + + // Error handling. bool err; bool panic; + + // Storage. + Node **nodes; + Arena *storage; } Parser; typedef enum { @@ -80,6 +180,7 @@ ParseRule parse_rules[] = { [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, [TOK_NUMBER] = {parse_number, NULL, PREC_NONE}, + [TOK_EOF] = {NULL, NULL, PREC_NONE}, }; void @@ -91,9 +192,9 @@ parse_emit_err(Parser *parser, Token token, Str msg) { parser->err = true; eprint("%d:%d: error: %s", token.line, token.col, msg); - if (token.type == TOK_EOF) { + if (token.kind == TOK_EOF) { eprintln(" -> at end of the file"); - } else if (token.type != TOK_UNKNOWN) { + } else if (token.kind != TOK_UNKNOWN) { eprintln(" -> at %s", token.val); } } @@ -106,8 +207,8 @@ parse_advance(Parser *parser) { } static void -parse_consume(Parser *parser, TokenType type, Str msg) { - if (parser->current.type == type) { +parse_consume(Parser *parser, TokenKind kind, Str msg) { + if (parser->current.kind == kind) { parse_advance(parser); return; } @@ -117,16 +218,16 @@ parse_consume(Parser *parser, TokenType type, Str msg) { static void parse_prec(Parser *parser, ParsePrecedence precedence) { parse_advance(parser); - ParseFn prefix = parse_rules[parser->previous.type].prefix; + ParseFn prefix = parse_rules[parser->previous.kind].prefix; if (prefix == NULL) { parse_emit_err(parser, parser->previous, cstr("expected expression")); return; } prefix(parser); - while (precedence <= parse_rules[parser->current.type].precedence) { + while (precedence <= parse_rules[parser->current.kind].precedence) { parse_advance(parser); - ParseFn infix = parse_rules[parser->previous.type].infix; + ParseFn infix = parse_rules[parser->previous.kind].infix; if (infix == NULL) { parse_emit_err(parser, parser->previous, cstr("expected expression")); @@ -138,13 +239,15 @@ parse_prec(Parser *parser, ParsePrecedence precedence) { void parse_unary(Parser *parser) { +#if DEBUG == 1 print("parsing unary "); print_token(parser->previous); +#endif parse_prec(parser, PREC_ASSIGNMENT); - TokenType type = parser->previous.type; + TokenKind kind = parser->previous.kind; parse_expr(parser); // TODO: ... - switch (type) { + switch (kind) { // case TOKEN_MINUS: emitByte(OP_NEGATE); break; default: return; // Unreachable. @@ -153,46 +256,112 @@ parse_unary(Parser *parser) { void parse_binary(Parser *parser) { +#if DEBUG == 1 print("parsing binary "); print_token(parser->previous); - TokenType operatorType = parser->previous.type; - ParseRule rule = parse_rules[operatorType]; +#endif + TokenKind kind = parser->previous.kind; + ParseRule rule = parse_rules[kind]; parse_prec(parser, rule.precedence + 1); - // TODO: ... - // switch (operatorType) { - // case TOKEN_PLUS: emitByte(OP_ADD); break; - // case TOKEN_MINUS: emitByte(OP_SUBTRACT); break; - // case TOKEN_STAR: emitByte(OP_MULTIPLY); break; - // case TOKEN_SLASH: emitByte(OP_DIVIDE); break; - // default: return; // Unreachable. - // } + Node *node; + switch (kind) { + case TOK_ADD: { + node = node_alloc(NODE_ADD, parser->previous, parser->storage); + } break; + case TOK_SUB: { + node = node_alloc(NODE_SUB, parser->previous, parser->storage); + } break; + case TOK_MUL: { + node = node_alloc(NODE_MUL, parser->previous, parser->storage); + } break; + case TOK_DIV: { + node = node_alloc(NODE_DIV, parser->previous, parser->storage); + } break; + default: { + parse_emit_err(parser, parser->previous, cstr("unreachable")); + return; + } + } + node->right = array_pop(parser->nodes); + node->left = array_pop(parser->nodes); + array_push(parser->nodes, node, parser->storage); } void parse_number(Parser *parser) { +#if DEBUG == 1 print("parsing number "); print_token(parser->previous); - // TODO: ... - // double value = strtod(parser.previous.start, NULL); - // emitConstant(value); +#endif + Node *node = node_alloc(NODE_NUMBER, parser->previous, parser->storage); + node->value.i = str_to_int(parser->previous.val); + // TODO: handle sign and/or floating point values. + array_push(parser->nodes, node, parser->storage); } void parse_grouping(Parser *parser) { +#if DEBUG == 1 print("parsing group "); print_token(parser->previous); +#endif parse_expr(parser); parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression")); } void parse_expr(Parser *parser) { - // TODO: ... // Can this be prec_none instead? parse_prec(parser, PREC_ASSIGNMENT); } +void +graph_node(Node *node) { + if (node == NULL) { + return; + } + print("%d [width=2.5,shape=Mrecord,label=\"", node->id); + print(" %s ", node_str[node->kind]); + switch (node->kind) { + case NODE_NUMBER: { + print("| Value: %d", node->value.i); + } break; + default: + break; + } + print("| Line: %d | Col: %d ", node->line, node->col); + println("\"];"); + if (node->left) { + println("%d:top:e->%d:top:w;", node->id, node->left->id); + graph_node(node->left); + } + if (node->right) { + println("%d:top:e->%d:top:w;", node->id, node->right->id); + graph_node(node->right); + } +} + +void +graph_ast(Node **roots) { + if (roots == NULL) { + return; + } + println("digraph ast {"); + println("rankdir=LR;"); + println("ranksep=\"0.95 equally\";"); + println("nodesep=\"0.5 equally\";"); + println("overlap=scale;"); + println("bgcolor=\"transparent\";"); + for (sz i = 0; i < array_size(roots); ++i) { + println("subgraph %d {", i); + Node *root = roots[array_size(roots) - 1 - i]; + graph_node(root); + println("}"); + } + println("}"); +} + void process_file(Str path) { Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); @@ -208,11 +377,11 @@ process_file(Str path) { Scanner scanner = {.str = file.data}; Token *tokens = NULL; Token tok = {0}; - while (tok.type != TOK_EOF) { + while (tok.kind != TOK_EOF) { tok = scan_token(&scanner); - if (tok.type == TOK_UNKNOWN) { + if (tok.kind == TOK_UNKNOWN) { eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col, - token_str[tok.type], tok.val); + token_str[tok.kind], tok.val); errors++; continue; } @@ -225,11 +394,31 @@ process_file(Str path) { } // Parser. - Parser parser = {.tokens = tokens}; + Parser parser = { + .tokens = tokens, + .storage = &lexer_arena, + }; + array_init(parser.nodes, 256, parser.storage); parse_advance(&parser); - parse_expr(&parser); - parse_consume(&parser, TOK_EOF, cstr("expected end of file")); + while (parser.current.kind != TOK_EOF) { + parse_expr(&parser); + } + if (parser.err) { + goto stop; + } + if (mode == PRINT_PARSE) { + graph_ast(parser.nodes); + goto stop; + } + sz n_roots = array_size(parser.nodes); + for (sz i = 0; i < n_roots; i++) { + // The parser stores the root nodes as a stack. + Node *root = parser.nodes[i]; + print_node(root); println(""); + (void)root; + } + parse_consume(&parser, TOK_EOF, cstr("expected end of file")); // print_tokens(path, tokens); // DEBUG stop: diff --git a/tests/expressions.bad b/tests/expressions.bad index c992a7a..af60b9b 100644 --- a/tests/expressions.bad +++ b/tests/expressions.bad @@ -1,4 +1,7 @@ -1 + 2 * 5 - 2 -; 1 + 2 * (5 - 2) -; (1 + 2 * (5 - 2)) -; (1 + 2 * 5 - 2) +1 + 2 + +1 + 2 * 3 + +1 + 2 * 3 - 4 + +1 + 2 * (3 - 4) -- cgit v1.2.1