From c2c8796511c90930c41700f5fcd2043a5c4405c9 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Sun, 16 Jun 2024 11:20:28 +0200 Subject: Setup initial Pratt parser --- Makefile | 2 +- src/badlib.h | 4 ++ src/main.c | 192 ++++++++++++++++++++++++++++++++++++++++++++++++-- tests/expressions.bad | 4 ++ 4 files changed, 196 insertions(+), 6 deletions(-) create mode 100644 tests/expressions.bad diff --git a/Makefile b/Makefile index 4e66983..987ecd8 100644 --- a/Makefile +++ b/Makefile @@ -44,7 +44,7 @@ $(BUILD_DIR): mkdir -p $(BUILD_DIR) run: $(BIN) - $(BIN) tests/literals.bad + $(BIN) tests/expressions.bad viz_lex: $(BIN) $(BIN) -pl example.bdl diff --git a/src/badlib.h b/src/badlib.h index a91fcf8..71f4bee 100644 --- a/src/badlib.h +++ b/src/badlib.h @@ -11,6 +11,7 @@ // - Implement binary search for searching into an array: // SearchResult find_array(Array haystack, Array needle). // - Logger functions for hash map and queues. +// - Make assert/abort macros dump the file name/line? // #include @@ -811,6 +812,9 @@ _array_reserve(sz num_elem, sz type_size, Arena *a) { static inline void * _array_maybe_grow(void *arr, sz type_size, Arena *a) { + if (!arr) { + arr = _array_reserve(0, 0, a); + } ArrayHeader *head = array_head(arr); if (head->cap == head->size) { sz prev_size = head->cap * type_size + sizeof(ArrayHeader); diff --git a/src/main.c b/src/main.c index 9848b8b..44a7bf1 100644 --- a/src/main.c +++ b/src/main.c @@ -20,29 +20,203 @@ init(void) { log_init_default(); } +void +print_token(Token tok) { + println("%d:%d\t%s %s", tok.line, tok.col, token_str[tok.type], tok.val); +} + +void +print_tokens(Str path, Token *tokens) { + for (sz i = 0; i < array_size(tokens); i++) { + Token tok = tokens[i]; + print("%s:", path); + print_token(tok); + } +} + +typedef struct Parser { + Token current; + Token previous; + Token *tokens; + sz idx; + bool err; + bool panic; +} Parser; + +typedef enum { + PREC_NONE = 0, + PREC_ASSIGNMENT, // = FIXME: this may not happen in our lang? + PREC_OR, // || + PREC_AND, // && + PREC_EQUALITY, // == != + PREC_COMPARISON, // < > <= >= + PREC_TERM, // + - + PREC_FACTOR, // * / + PREC_UNARY, // ! - + PREC_CALL, // . () + PREC_PRIMARY +} ParsePrecedence; + +typedef void (*ParseFn)(Parser *); + +typedef struct { + ParseFn prefix; + ParseFn infix; + ParsePrecedence precedence; +} ParseRule; + +void parse_expr(Parser *parser); +void parse_advance(Parser *parser); + +void parse_grouping(Parser *parser); +void parse_unary(Parser *parser); +void parse_binary(Parser *parser); +void parse_number(Parser *parser); + +ParseRule parse_rules[] = { + [TOK_LPAREN] = {parse_grouping, NULL, PREC_NONE}, + [TOK_SUB] = {parse_unary, parse_binary, PREC_TERM}, + [TOK_ADD] = {NULL, parse_binary, PREC_TERM}, + [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, + [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, + [TOK_NUMBER] = {parse_number, NULL, PREC_NONE}, +}; + +void +parse_emit_err(Parser *parser, Token token, Str msg) { + if (parser->panic) { + return; + } + parser->panic = true; + parser->err = true; + eprint("%d:%d: error: %s", token.line, token.col, msg); + + if (token.type == TOK_EOF) { + eprintln(" -> at end of the file"); + } else if (token.type != TOK_UNKNOWN) { + eprintln(" -> at %s", token.val); + } +} + +void +parse_advance(Parser *parser) { + assert(parser); + parser->previous = parser->current; + parser->current = parser->tokens[parser->idx++]; +} + +static void +parse_consume(Parser *parser, TokenType type, Str msg) { + if (parser->current.type == type) { + parse_advance(parser); + return; + } + parse_emit_err(parser, parser->current, msg); +} + +static void +parse_prec(Parser *parser, ParsePrecedence precedence) { + parse_advance(parser); + ParseFn prefix = parse_rules[parser->previous.type].prefix; + if (prefix == NULL) { + parse_emit_err(parser, parser->previous, cstr("expected expression")); + return; + } + prefix(parser); + + while (precedence <= parse_rules[parser->current.type].precedence) { + parse_advance(parser); + ParseFn infix = parse_rules[parser->previous.type].infix; + if (infix == NULL) { + parse_emit_err(parser, parser->previous, + cstr("expected expression")); + return; + } + infix(parser); + } +} + +void +parse_unary(Parser *parser) { + print("parsing unary "); + print_token(parser->previous); + parse_prec(parser, PREC_ASSIGNMENT); + TokenType type = parser->previous.type; + parse_expr(parser); + // TODO: ... + switch (type) { + // case TOKEN_MINUS: emitByte(OP_NEGATE); break; + default: + return; // Unreachable. + } +} + +void +parse_binary(Parser *parser) { + print("parsing binary "); + print_token(parser->previous); + TokenType operatorType = parser->previous.type; + ParseRule rule = parse_rules[operatorType]; + parse_prec(parser, rule.precedence + 1); + + // TODO: ... + // switch (operatorType) { + // case TOKEN_PLUS: emitByte(OP_ADD); break; + // case TOKEN_MINUS: emitByte(OP_SUBTRACT); break; + // case TOKEN_STAR: emitByte(OP_MULTIPLY); break; + // case TOKEN_SLASH: emitByte(OP_DIVIDE); break; + // default: return; // Unreachable. + // } +} + +void +parse_number(Parser *parser) { + print("parsing number "); + print_token(parser->previous); + // TODO: ... + // double value = strtod(parser.previous.start, NULL); + // emitConstant(value); +} + +void +parse_grouping(Parser *parser) { + print("parsing group "); + print_token(parser->previous); + parse_expr(parser); + parse_consume(parser, TOK_RPAREN, cstr("expected ')' after expression")); +} + +void +parse_expr(Parser *parser) { + // TODO: ... + // Can this be prec_none instead? + parse_prec(parser, PREC_ASSIGNMENT); +} + void process_file(Str path) { Arena lexer_arena = arena_create(LEXER_MEM, os_allocator); FileContents file = platform_read_file(path, &lexer_arena); if (file.err) { - printf("file.err: %d\n", file.err); eprintln("%s: error: %s", path, cstr("WOT")); return; } + sz errors = 0; - Scanner scanner = { - .str = file.data, - }; + // Lexer. + Scanner scanner = {.str = file.data}; + Token *tokens = NULL; Token tok = {0}; - sz errors = 0; while (tok.type != TOK_EOF) { tok = scan_token(&scanner); if (tok.type == TOK_UNKNOWN) { eprintln("%s:%d:%d:%s %s", path, tok.line, tok.col, token_str[tok.type], tok.val); errors++; + continue; } + array_push(tokens, tok, &lexer_arena); } // Only proceed if there are no errors. @@ -50,6 +224,14 @@ process_file(Str path) { goto stop; } + // Parser. + Parser parser = {.tokens = tokens}; + parse_advance(&parser); + parse_expr(&parser); + parse_consume(&parser, TOK_EOF, cstr("expected end of file")); + + // print_tokens(path, tokens); // DEBUG + stop: // Free up resources. arena_destroy(&lexer_arena, os_allocator); diff --git a/tests/expressions.bad b/tests/expressions.bad new file mode 100644 index 0000000..c992a7a --- /dev/null +++ b/tests/expressions.bad @@ -0,0 +1,4 @@ +1 + 2 * 5 - 2 +; 1 + 2 * (5 - 2) +; (1 + 2 * (5 - 2)) +; (1 + 2 * 5 - 2) -- cgit v1.2.1