From b86d262b9fe27131d8163a6ba49957736691b1d0 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Sun, 16 Jun 2024 20:19:56 +0200 Subject: Add floating point number parsing --- Makefile | 2 +- src/badlib.h | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/lexer.c | 16 +++++---- src/main.c | 45 ++++++++++++++++++-------- tests/expressions.bad | 4 ++- 5 files changed, 135 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index 35447d3..7fed16b 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ DOT := dot -Gmargin=0.7 -Gcolor=white -Gfontcolor=white \ -T png | kitty +kitten icat # Output executable. -TARGET := bdl +TARGET := badc BIN := $(BUILD_DIR)/$(TARGET) # Compiler and linker configuration. diff --git a/src/badlib.h b/src/badlib.h index 9802278..d334fa7 100644 --- a/src/badlib.h +++ b/src/badlib.h @@ -586,6 +586,9 @@ str_to_int(Str s) { s = str_remove_prefix(s, cstr("0b")); while (s.size) { char c = str_next(&s); + if (c == '_') { + continue; + } assert(c == '0' || c == '1'); num = num * 2 + (c - '0'); } @@ -594,6 +597,9 @@ str_to_int(Str s) { s = str_remove_prefix(s, cstr("0x")); while (s.size) { char c = str_next(&s); + if (c == '_') { + continue; + } assert((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); if (c >= '0' && c <= '9') { @@ -606,11 +612,95 @@ str_to_int(Str s) { } } else { // Decimal number. + char c = str_peek(s); + sz neg = 1; + if (c == '-') { + neg = -1; + str_next(&s); + } else if (c == '+') { + str_next(&s); + } + // TODO: check if it fits within the s64 range. while (s.size) { char c = str_next(&s); + if (c == '_') { + continue; + } assert(c >= '0' && c <= '9'); num = num * 10 + (c - '0'); } + num *= neg; + } + return num; +} + +f64 +str_to_float(Str s) { + char c = str_peek(s); + f64 neg = 1.0; + if (c == '-') { + neg = -1.0; + str_next(&s); + } else if (c == '+') { + str_next(&s); + } + f64 num = 0.0; + // Integral part. + while (s.size) { + char c = str_next(&s); + if (c == '_') { + continue; + } + if (c == '.') { + break; + } + assert(c >= '0' && c <= '9'); + num = num * 10 + (c - '0'); + } + // Fractional part. + f64 frac = 0; + sz frac_digits = 1; + bool has_exponent = false; + while (s.size) { + char c = str_next(&s); + if (c == '_') { + continue; + } + if (c == 'e' || c == 'E') { + has_exponent = true; + break; + } + assert(c >= '0' && c <= '9'); + frac = frac * 10 + (c - '0'); + frac_digits *= 10; + } + num *= neg; + num += frac / frac_digits; + if (has_exponent) { + bool exp_neg = false; + char c = str_peek(s); + if (c == '-') { + exp_neg = true; + str_next(&s); + } else if (c == '+') { + str_next(&s); + } + sz exponent = 0; + while (s.size) { + c = str_next(&s); + if (c == '_') { + continue; + } + assert(c >= '0' && c <= '9'); + exponent = exponent * 10 + (c - '0'); + } + for (sz i = 0; i < exponent; i++) { + if (!exp_neg) { + num *= 10; + } else { + num /= 10; + } + } } return num; } diff --git a/src/lexer.c b/src/lexer.c index 404082b..997a9f3 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -14,7 +14,8 @@ typedef enum TokenKind { TOK_RCURLY, // } // Basic literals. - TOK_NUMBER, + TOK_NUM_INT, + TOK_NUM_FLOAT, TOK_SYMBOL, TOK_STRING, @@ -81,7 +82,8 @@ Str token_str[] = { [TOK_RCURLY] = cstr("RCURLY"), // Basic literals. - [TOK_NUMBER] = cstr("NUMBER"), + [TOK_NUM_INT] = cstr("INUMBER"), + [TOK_NUM_FLOAT] = cstr("FNUMBER"), [TOK_SYMBOL] = cstr("SYMBOL"), [TOK_STRING] = cstr("STRING"), @@ -304,7 +306,7 @@ emit_token_number(Scanner *scanner) { continue; } if (scan_is_valid_split(c)) { - return emit_token(current, scanner, TOK_NUMBER); + return emit_token(current, scanner, TOK_NUM_INT); } scan_skip_until_valid(scanner); return emit_token_err( @@ -321,7 +323,7 @@ emit_token_number(Scanner *scanner) { continue; } if (scan_is_valid_split(c)) { - return emit_token(current, scanner, TOK_NUMBER); + return emit_token(current, scanner, TOK_NUM_INT); } scan_skip_until_valid(scanner); return emit_token_err(¤t, @@ -340,7 +342,7 @@ emit_token_number(Scanner *scanner) { continue; } if (scan_is_valid_split(c)) { - return emit_token(current, scanner, TOK_NUMBER); + return emit_token(current, scanner, TOK_NUM_INT); } scan_skip_until_valid(scanner); return emit_token_err(¤t, cstr("malformed number")); @@ -362,7 +364,7 @@ emit_token_number(Scanner *scanner) { continue; } if (scan_is_valid_split(c)) { - return emit_token(current, scanner, TOK_NUMBER); + return emit_token(current, scanner, TOK_NUM_FLOAT); } scan_skip_until_valid(scanner); return emit_token_err(¤t, cstr("malformed number")); @@ -385,7 +387,7 @@ emit_token_number(Scanner *scanner) { cstr("malformed number: decimals not allowed on exponent")); } if (scan_is_valid_split(c)) { - return emit_token(current, scanner, TOK_NUMBER); + return emit_token(current, scanner, TOK_NUM_FLOAT); } scan_skip_until_valid(scanner); return emit_token_err(¤t, cstr("malformed number")); diff --git a/src/main.c b/src/main.c index c321d7c..f15afed 100644 --- a/src/main.c +++ b/src/main.c @@ -39,19 +39,27 @@ print_tokens(Str path, Token *tokens) { // typedef enum NodeKind { - NODE_NUMBER, + NODE_NUM_INT, + NODE_NUM_FLOAT, // TODO: probably want to handle ints/unsigneds/floats separately. NODE_ADD, NODE_SUB, NODE_DIV, NODE_MUL, - // TODO: MOD + NODE_MOD, } NodeKind; Str node_str[] = { - [NODE_NUMBER] = cstr("NUM"), [NODE_ADD] = cstr("ADD"), - [NODE_SUB] = cstr("SUB"), [NODE_DIV] = cstr("DIV"), + // Arithmetic. + [NODE_ADD] = cstr("ADD"), + [NODE_SUB] = cstr("SUB"), + [NODE_DIV] = cstr("DIV"), [NODE_MUL] = cstr("MUL"), + [NODE_MOD] = cstr("MOD"), + + // Literals. + [NODE_NUM_INT] = cstr("INT"), + [NODE_NUM_FLOAT] = cstr("FLOAT"), }; typedef struct Node { @@ -62,9 +70,7 @@ typedef struct Node { NodeKind kind; union { f64 d; - f32 f; sz i; - u64 u; } value; struct Node *left; struct Node *right; @@ -108,7 +114,7 @@ typedef enum { PREC_EQUALITY, // == != PREC_COMPARISON, // < > <= >= PREC_TERM, // + - - PREC_FACTOR, // * / + PREC_FACTOR, // * / % PREC_UNARY, // ! - PREC_CALL, // . () PREC_PRIMARY // highest precedence @@ -136,7 +142,9 @@ ParseRule parse_rules[] = { [TOK_ADD] = {NULL, parse_binary, PREC_TERM}, [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, - [TOK_NUMBER] = {parse_number, NULL, PREC_NONE}, + [TOK_MOD] = {NULL, parse_binary, PREC_FACTOR}, + [TOK_NUM_INT] = {parse_number, NULL, PREC_NONE}, + [TOK_NUM_FLOAT] = {parse_number, NULL, PREC_NONE}, [TOK_EOF] = {NULL, NULL, PREC_NONE}, }; @@ -201,7 +209,7 @@ parse_unary(Parser *parser) { print("parsing unary "); print_token(prev); #endif - TokenKind kind = parser->previous.kind; + TokenKind kind = prev.kind; parse_expr(parser, PREC_LOW); // TODO: ... switch (kind) { @@ -227,6 +235,7 @@ parse_binary(Parser *parser) { case TOK_SUB: node = node_alloc(NODE_SUB, prev, parser->storage); break; case TOK_MUL: node = node_alloc(NODE_MUL, prev, parser->storage); break; case TOK_DIV: node = node_alloc(NODE_DIV, prev, parser->storage); break; + case TOK_MOD: node = node_alloc(NODE_MOD, prev, parser->storage); break; default: { parse_emit_err(parser, prev, cstr("unreachable")); return; @@ -244,9 +253,18 @@ parse_number(Parser *parser) { print("parsing number "); print_token(prev); #endif - Node *node = node_alloc(NODE_NUMBER, prev, parser->storage); - node->value.i = str_to_int(prev.val); - // TODO: handle sign and/or floating point values. + Node *node = NULL; + switch (prev.kind) { + case TOK_NUM_INT: { + node = node_alloc(NODE_NUM_INT, prev, parser->storage); + node->value.i = str_to_int(prev.val); + } break; + case TOK_NUM_FLOAT: { + node = node_alloc(NODE_NUM_FLOAT, prev, parser->storage); + node->value.d = str_to_float(prev.val); + } break; + default: break; + } array_push(parser->nodes, node, parser->storage); } @@ -268,7 +286,8 @@ graph_node(Node *node) { print("%d [width=2.5,shape=Mrecord,label=\"", node->id); print(" %s ", node_str[node->kind]); switch (node->kind) { - case NODE_NUMBER: print("| Value: %d", node->value.i); break; + case NODE_NUM_INT: print("| Value: %d", node->value.i); break; + case NODE_NUM_FLOAT: print("| Value: %f{2}", node->value.d); break; default: break; } print("| Line: %d | Col: %d ", node->line, node->col); diff --git a/tests/expressions.bad b/tests/expressions.bad index af60b9b..5b114d1 100644 --- a/tests/expressions.bad +++ b/tests/expressions.bad @@ -1,7 +1,9 @@ -1 + 2 +1 + -2 1 + 2 * 3 1 + 2 * 3 - 4 1 + 2 * (3 - 4) + +1.0 - 1234.56e-3 -- cgit v1.2.1