#include #include #include #include "badlib.h" #include "lexer.c" typedef enum ExecMode { RUN_NORMAL, PRINT_LEX, PRINT_PARSE, PRINT_SEMANTIC, PRINT_SYMTABLES, } ExecMode; static ExecMode mode = RUN_NORMAL; void init(void) { log_init_default(); } void print_token(Token tok) { println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.kind], tok.val); } void print_tokens(Str path, Token *tokens) { for (sz i = 0; i < array_size(tokens); i++) { Token tok = tokens[i]; print("%s:", path); print_token(tok); } } // // Nodes // typedef enum NodeKind { NODE_NUM_INT, NODE_NUM_FLOAT, // TODO: probably want to handle ints/unsigneds/floats separately. NODE_ADD, NODE_SUB, NODE_DIV, NODE_MUL, NODE_MOD, } NodeKind; Str node_str[] = { // Arithmetic. [NODE_ADD] = cstr("ADD"), [NODE_SUB] = cstr("SUB"), [NODE_DIV] = cstr("DIV"), [NODE_MUL] = cstr("MUL"), [NODE_MOD] = cstr("MOD"), // Literals. [NODE_NUM_INT] = cstr("INT"), [NODE_NUM_FLOAT] = cstr("FLOAT"), }; typedef struct Node { sz id; sz line; sz col; NodeKind kind; union { f64 d; sz i; } value; struct Node *left; struct Node *right; } Node; Node * node_alloc(NodeKind kind, Token tok, Arena *a) { static sz id = 0; Node *node = arena_calloc((sz)sizeof(Node), a); node->id = id++; node->kind = kind; node->line = tok.line; node->col = tok.col; return node; } // // Parsing. // typedef struct Parser { Token current; Token previous; Token *tokens; sz idx; // Error handling. bool err; bool panic; // Storage. Node **nodes; Arena *storage; } Parser; typedef enum { PREC_NONE = 0, PREC_LOW, // lowest precedence PREC_OR, // || PREC_AND, // && PREC_EQUALITY, // == != PREC_COMPARISON, // < > <= >= PREC_TERM, // + - PREC_FACTOR, // * / % PREC_UNARY, // ! - PREC_CALL, // . () PREC_PRIMARY // highest precedence } ParsePrecedence; typedef void (*ParseFn)(Parser *); typedef struct { ParseFn prefix; ParseFn infix; ParsePrecedence precedence; } ParseRule; void parse_expr(Parser *parser, ParsePrecedence precedence); void parse_advance(Parser *parser); void parse_grouping(Parser *parser); void parse_unary(Parser *parser); void parse_binary(Parser *parser); void parse_number(Parser *parser); ParseRule parse_rules[] = { [TOK_LPAREN] = {parse_grouping, NULL, PREC_NONE}, [TOK_SUB] = {parse_unary, parse_binary, PREC_TERM}, [TOK_ADD] = {NULL, parse_binary, PREC_TERM}, [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, [TOK_MOD] = {NULL, parse_binary, PREC_FACTOR}, [TOK_NUM_INT] = {parse_number, NULL, PREC_NONE}, [TOK_NUM_FLOAT] = {parse_number, NULL, PREC_NONE}, [TOK_EOF] = {NULL, NULL, PREC_NONE}, }; void parse_emit_err(Parser *parser, Token token, Str msg) { if (parser->panic) { return; } parser->panic = true; parser->err = true; eprint("%d:%d: error: %s", token.line, token.col, msg); if (token.kind == TOK_EOF) { eprintln(" -> at end of the file"); } else if (token.kind != TOK_UNKNOWN) { eprintln(" -> at %s", token.val); } } void parse_advance(Parser *parser) { assert(parser); parser->previous = parser->current; parser->current = parser->tokens[parser->idx++]; } static void parse_consume(Parser *parser, TokenKind kind, Str msg) { if (parser->current.kind == kind) { parse_advance(parser); return; } parse_emit_err(parser, parser->current, msg); } void parse_expr(Parser *parser, ParsePrecedence precedence) { parse_advance(parser); ParseFn prefix = parse_rules[parser->previous.kind].prefix; if (prefix == NULL) { parse_emit_err(parser, parser->previous, cstr("expected expression")); return; } prefix(parser); while (precedence <= parse_rules[parser->current.kind].precedence) { parse_advance(parser); ParseFn infix = parse_rules[parser->previous.kind].infix; if (infix == NULL) { parse_emit_err(parser, parser->previous, cstr("expected expression")); return; } infix(parser); } } void parse_unary(Parser *parser) { Token prev = parser->previous; #if DEBUG == 1 print("parsing unary "); print_token(prev); #endif TokenKind kind = prev.kind; parse_expr(parser, PREC_LOW); // TODO: ... switch (kind) { // case TOKEN_MINUS: emitByte(OP_NEGATE); break; default: return; // Unreachable. } } void parse_binary(Parser *parser) { Token prev = parser->previous; #if DEBUG == 1 print("parsing binary "); print_token(prev); #endif TokenKind kind = prev.kind; ParseRule rule = parse_rules[kind]; parse_expr(parser, rule.precedence + 1); Node *node; switch (kind) { case TOK_ADD: node = node_alloc(NODE_ADD, prev, parser->storage); break; case TOK_SUB: node = node_alloc(NODE_SUB, prev, parser->storage); break; case TOK_MUL: node = node_alloc(NODE_MUL, prev, parser->storage); break; case TOK_DIV: node = node_alloc(NODE_DIV, prev, parser->storage); break; case TOK_MOD: node = node_alloc(NODE_MOD, prev, parser->storage); break; default: { parse_emit_err(parser, prev, cstr("unreachable")); return; } } node->right = array_pop(parser->nodes); node->left = array_pop(parser->nodes); array_push(parser->nodes, node, parser->storage); } void parse_number(Parser *parser) { Token prev = parser->previous; #if DEBUG == 1 print("parsing number "); print_token(prev); #endif Node *node = NULL; switch (prev.kind) { case TOK_NUM_INT: { node = node_alloc(NODE_NUM_INT, prev, parser->storage); node->value.i = str_to_int(prev.val); } break; case TOK_NUM_FLOAT: { node = node_alloc(NODE_NUM_FLOAT, prev, parser->storage); node->value.d = str_to_float(prev.val); } break; default: break; } array_push(parser->nodes, node, parser->storage); } void parse_grouping(Parser *parser) { #if DEBUG == 1 print("parsing group "); print_token(parser->previous); #endif parse_expr(parser, PREC_LOW); parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression")); } void graph_node(Node *node) { if (node == NULL) { return; } print("%d [width=2.5,shape=Mrecord,label=\"", node->id); print(" %s ", node_str[node->kind]); switch (node->kind) { case NODE_NUM_INT: print("| Value: %d", node->value.i); break; case NODE_NUM_FLOAT: print("| Value: %f{2}", node->value.d); break; default: break; } print("| Line: %d | Col: %d ", node->line, node->col); println("\"];"); if (node->left) { println("%d:top:e->%d:top:w;", node->id, node->left->id); graph_node(node->left); } if (node->right) { println("%d:top:e->%d:top:w;", node->id, node->right->id); graph_node(node->right); } } void graph_ast(Node **roots) { if (roots == NULL) { return; } println("digraph ast {"); println("rankdir=LR;"); println("ranksep=\"0.95 equally\";"); println("nodesep=\"0.5 equally\";"); println("overlap=scale;"); println("bgcolor=\"transparent\";"); for (sz i = 0; i < array_size(roots); ++i) { println("subgraph %d {", i); Node *root = roots[array_size(roots) - 1 - i]; graph_node(root); println("}"); } println("}"); } void process_file(Str path) { Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); FileContents file = platform_read_file(path, &lexer_arena); if (file.err) { eprintln("%s: error: %s", path, cstr("WOT")); return; } sz errors = 0; // Lexer. Scanner scanner = {.str = file.data}; Token *tokens = NULL; Token tok = {0}; while (tok.kind != TOK_EOF) { tok = scan_token(&scanner); if (tok.kind == TOK_UNKNOWN) { eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col, token_str[tok.kind], tok.val); errors++; continue; } array_push(tokens, tok, &lexer_arena); } // Only proceed if there are no errors. if (errors) { goto stop; } // Parser. Parser parser = { .tokens = tokens, .storage = &lexer_arena, }; array_init(parser.nodes, 256, parser.storage); parse_advance(&parser); while (parser.current.kind != TOK_EOF) { parse_expr(&parser, PREC_LOW); } if (parser.err) { goto stop; } if (mode == PRINT_PARSE) { graph_ast(parser.nodes); goto stop; } sz n_roots = array_size(parser.nodes); for (sz i = 0; i < n_roots; i++) { // The parser stores the root nodes as a stack. Node *root = parser.nodes[i]; (void)root; } parse_consume(&parser, TOK_EOF, cstr("expected end of file")); // print_tokens(path, tokens); // DEBUG stop: // Free up resources. arena_destroy(&lexer_arena, os_allocator); } #ifndef BIN_NAME #define BIN_NAME "bdl" #endif void print_usage(void) { printf("Usage: %s [options] \n", BIN_NAME); printf("\n"); printf("\t-h \t\tShow usage.\n"); printf( "\t-p [l|p|s|t]\tPrint mode for [l]exing, [p]arsing, [s]emantic " "analysis, symbol [t]ables\n"); printf("\n"); } int main(int argc, char *argv[]) { int option; while ((option = getopt(argc, argv, "hp:")) != -1) { switch (option) { case 'h': { print_usage(); goto exit_success; } break; case 'p': { if (optarg[0] == 'l' && optarg[1] == '\0') { mode = PRINT_LEX; } else if (optarg[0] == 'p' && optarg[1] == '\0') { mode = PRINT_PARSE; } else if (optarg[0] == 's' && optarg[1] == '\0') { mode = PRINT_SEMANTIC; } else if (optarg[0] == 't' && optarg[1] == '\0') { mode = PRINT_SYMTABLES; } else { print_usage(); return EXIT_FAILURE; } } break; default: { print_usage(); return EXIT_FAILURE; } break; } } init(); // Run from stdin. if (optind == argc) { // TODO: REPL // repl(); goto exit_success; } // Run from file. while (optind < argc) { char *file_name = argv[optind]; Str file_path = STR(file_name); process_file(file_path); optind++; } exit_success: return EXIT_SUCCESS; }