aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2024-06-16 20:19:56 +0200
committerBad Diode <bd@badd10de.dev>2024-06-16 20:19:56 +0200
commitb86d262b9fe27131d8163a6ba49957736691b1d0 (patch)
treecde0152015c4cb2403d6b99b92dccaed46ad0f2f
parentd6e8c277b13ee5a9878c78dd7d0104699202a161 (diff)
downloadbdl-b86d262b9fe27131d8163a6ba49957736691b1d0.tar.gz
bdl-b86d262b9fe27131d8163a6ba49957736691b1d0.zip
Add floating point number parsing
-rw-r--r--Makefile2
-rw-r--r--src/badlib.h90
-rw-r--r--src/lexer.c16
-rw-r--r--src/main.c45
-rw-r--r--tests/expressions.bad4
5 files changed, 135 insertions, 22 deletions
diff --git a/Makefile b/Makefile
index 35447d3..7fed16b 100644
--- a/Makefile
+++ b/Makefile
@@ -17,7 +17,7 @@ DOT := dot -Gmargin=0.7 -Gcolor=white -Gfontcolor=white \
17 -T png | kitty +kitten icat 17 -T png | kitty +kitten icat
18 18
19# Output executable. 19# Output executable.
20TARGET := bdl 20TARGET := badc
21BIN := $(BUILD_DIR)/$(TARGET) 21BIN := $(BUILD_DIR)/$(TARGET)
22 22
23# Compiler and linker configuration. 23# Compiler and linker configuration.
diff --git a/src/badlib.h b/src/badlib.h
index 9802278..d334fa7 100644
--- a/src/badlib.h
+++ b/src/badlib.h
@@ -586,6 +586,9 @@ str_to_int(Str s) {
586 s = str_remove_prefix(s, cstr("0b")); 586 s = str_remove_prefix(s, cstr("0b"));
587 while (s.size) { 587 while (s.size) {
588 char c = str_next(&s); 588 char c = str_next(&s);
589 if (c == '_') {
590 continue;
591 }
589 assert(c == '0' || c == '1'); 592 assert(c == '0' || c == '1');
590 num = num * 2 + (c - '0'); 593 num = num * 2 + (c - '0');
591 } 594 }
@@ -594,6 +597,9 @@ str_to_int(Str s) {
594 s = str_remove_prefix(s, cstr("0x")); 597 s = str_remove_prefix(s, cstr("0x"));
595 while (s.size) { 598 while (s.size) {
596 char c = str_next(&s); 599 char c = str_next(&s);
600 if (c == '_') {
601 continue;
602 }
597 assert((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || 603 assert((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
598 (c >= 'A' && c <= 'F')); 604 (c >= 'A' && c <= 'F'));
599 if (c >= '0' && c <= '9') { 605 if (c >= '0' && c <= '9') {
@@ -606,11 +612,95 @@ str_to_int(Str s) {
606 } 612 }
607 } else { 613 } else {
608 // Decimal number. 614 // Decimal number.
615 char c = str_peek(s);
616 sz neg = 1;
617 if (c == '-') {
618 neg = -1;
619 str_next(&s);
620 } else if (c == '+') {
621 str_next(&s);
622 }
623 // TODO: check if it fits within the s64 range.
609 while (s.size) { 624 while (s.size) {
610 char c = str_next(&s); 625 char c = str_next(&s);
626 if (c == '_') {
627 continue;
628 }
611 assert(c >= '0' && c <= '9'); 629 assert(c >= '0' && c <= '9');
612 num = num * 10 + (c - '0'); 630 num = num * 10 + (c - '0');
613 } 631 }
632 num *= neg;
633 }
634 return num;
635}
636
637f64
638str_to_float(Str s) {
639 char c = str_peek(s);
640 f64 neg = 1.0;
641 if (c == '-') {
642 neg = -1.0;
643 str_next(&s);
644 } else if (c == '+') {
645 str_next(&s);
646 }
647 f64 num = 0.0;
648 // Integral part.
649 while (s.size) {
650 char c = str_next(&s);
651 if (c == '_') {
652 continue;
653 }
654 if (c == '.') {
655 break;
656 }
657 assert(c >= '0' && c <= '9');
658 num = num * 10 + (c - '0');
659 }
660 // Fractional part.
661 f64 frac = 0;
662 sz frac_digits = 1;
663 bool has_exponent = false;
664 while (s.size) {
665 char c = str_next(&s);
666 if (c == '_') {
667 continue;
668 }
669 if (c == 'e' || c == 'E') {
670 has_exponent = true;
671 break;
672 }
673 assert(c >= '0' && c <= '9');
674 frac = frac * 10 + (c - '0');
675 frac_digits *= 10;
676 }
677 num *= neg;
678 num += frac / frac_digits;
679 if (has_exponent) {
680 bool exp_neg = false;
681 char c = str_peek(s);
682 if (c == '-') {
683 exp_neg = true;
684 str_next(&s);
685 } else if (c == '+') {
686 str_next(&s);
687 }
688 sz exponent = 0;
689 while (s.size) {
690 c = str_next(&s);
691 if (c == '_') {
692 continue;
693 }
694 assert(c >= '0' && c <= '9');
695 exponent = exponent * 10 + (c - '0');
696 }
697 for (sz i = 0; i < exponent; i++) {
698 if (!exp_neg) {
699 num *= 10;
700 } else {
701 num /= 10;
702 }
703 }
614 } 704 }
615 return num; 705 return num;
616} 706}
diff --git a/src/lexer.c b/src/lexer.c
index 404082b..997a9f3 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -14,7 +14,8 @@ typedef enum TokenKind {
14 TOK_RCURLY, // } 14 TOK_RCURLY, // }
15 15
16 // Basic literals. 16 // Basic literals.
17 TOK_NUMBER, 17 TOK_NUM_INT,
18 TOK_NUM_FLOAT,
18 TOK_SYMBOL, 19 TOK_SYMBOL,
19 TOK_STRING, 20 TOK_STRING,
20 21
@@ -81,7 +82,8 @@ Str token_str[] = {
81 [TOK_RCURLY] = cstr("RCURLY"), 82 [TOK_RCURLY] = cstr("RCURLY"),
82 83
83 // Basic literals. 84 // Basic literals.
84 [TOK_NUMBER] = cstr("NUMBER"), 85 [TOK_NUM_INT] = cstr("INUMBER"),
86 [TOK_NUM_FLOAT] = cstr("FNUMBER"),
85 [TOK_SYMBOL] = cstr("SYMBOL"), 87 [TOK_SYMBOL] = cstr("SYMBOL"),
86 [TOK_STRING] = cstr("STRING"), 88 [TOK_STRING] = cstr("STRING"),
87 89
@@ -304,7 +306,7 @@ emit_token_number(Scanner *scanner) {
304 continue; 306 continue;
305 } 307 }
306 if (scan_is_valid_split(c)) { 308 if (scan_is_valid_split(c)) {
307 return emit_token(current, scanner, TOK_NUMBER); 309 return emit_token(current, scanner, TOK_NUM_INT);
308 } 310 }
309 scan_skip_until_valid(scanner); 311 scan_skip_until_valid(scanner);
310 return emit_token_err( 312 return emit_token_err(
@@ -321,7 +323,7 @@ emit_token_number(Scanner *scanner) {
321 continue; 323 continue;
322 } 324 }
323 if (scan_is_valid_split(c)) { 325 if (scan_is_valid_split(c)) {
324 return emit_token(current, scanner, TOK_NUMBER); 326 return emit_token(current, scanner, TOK_NUM_INT);
325 } 327 }
326 scan_skip_until_valid(scanner); 328 scan_skip_until_valid(scanner);
327 return emit_token_err(&current, 329 return emit_token_err(&current,
@@ -340,7 +342,7 @@ emit_token_number(Scanner *scanner) {
340 continue; 342 continue;
341 } 343 }
342 if (scan_is_valid_split(c)) { 344 if (scan_is_valid_split(c)) {
343 return emit_token(current, scanner, TOK_NUMBER); 345 return emit_token(current, scanner, TOK_NUM_INT);
344 } 346 }
345 scan_skip_until_valid(scanner); 347 scan_skip_until_valid(scanner);
346 return emit_token_err(&current, cstr("malformed number")); 348 return emit_token_err(&current, cstr("malformed number"));
@@ -362,7 +364,7 @@ emit_token_number(Scanner *scanner) {
362 continue; 364 continue;
363 } 365 }
364 if (scan_is_valid_split(c)) { 366 if (scan_is_valid_split(c)) {
365 return emit_token(current, scanner, TOK_NUMBER); 367 return emit_token(current, scanner, TOK_NUM_FLOAT);
366 } 368 }
367 scan_skip_until_valid(scanner); 369 scan_skip_until_valid(scanner);
368 return emit_token_err(&current, cstr("malformed number")); 370 return emit_token_err(&current, cstr("malformed number"));
@@ -385,7 +387,7 @@ emit_token_number(Scanner *scanner) {
385 cstr("malformed number: decimals not allowed on exponent")); 387 cstr("malformed number: decimals not allowed on exponent"));
386 } 388 }
387 if (scan_is_valid_split(c)) { 389 if (scan_is_valid_split(c)) {
388 return emit_token(current, scanner, TOK_NUMBER); 390 return emit_token(current, scanner, TOK_NUM_FLOAT);
389 } 391 }
390 scan_skip_until_valid(scanner); 392 scan_skip_until_valid(scanner);
391 return emit_token_err(&current, cstr("malformed number")); 393 return emit_token_err(&current, cstr("malformed number"));
diff --git a/src/main.c b/src/main.c
index c321d7c..f15afed 100644
--- a/src/main.c
+++ b/src/main.c
@@ -39,19 +39,27 @@ print_tokens(Str path, Token *tokens) {
39// 39//
40 40
41typedef enum NodeKind { 41typedef enum NodeKind {
42 NODE_NUMBER, 42 NODE_NUM_INT,
43 NODE_NUM_FLOAT,
43 // TODO: probably want to handle ints/unsigneds/floats separately. 44 // TODO: probably want to handle ints/unsigneds/floats separately.
44 NODE_ADD, 45 NODE_ADD,
45 NODE_SUB, 46 NODE_SUB,
46 NODE_DIV, 47 NODE_DIV,
47 NODE_MUL, 48 NODE_MUL,
48 // TODO: MOD 49 NODE_MOD,
49} NodeKind; 50} NodeKind;
50 51
51Str node_str[] = { 52Str node_str[] = {
52 [NODE_NUMBER] = cstr("NUM"), [NODE_ADD] = cstr("ADD"), 53 // Arithmetic.
53 [NODE_SUB] = cstr("SUB"), [NODE_DIV] = cstr("DIV"), 54 [NODE_ADD] = cstr("ADD"),
55 [NODE_SUB] = cstr("SUB"),
56 [NODE_DIV] = cstr("DIV"),
54 [NODE_MUL] = cstr("MUL"), 57 [NODE_MUL] = cstr("MUL"),
58 [NODE_MOD] = cstr("MOD"),
59
60 // Literals.
61 [NODE_NUM_INT] = cstr("INT"),
62 [NODE_NUM_FLOAT] = cstr("FLOAT"),
55}; 63};
56 64
57typedef struct Node { 65typedef struct Node {
@@ -62,9 +70,7 @@ typedef struct Node {
62 NodeKind kind; 70 NodeKind kind;
63 union { 71 union {
64 f64 d; 72 f64 d;
65 f32 f;
66 sz i; 73 sz i;
67 u64 u;
68 } value; 74 } value;
69 struct Node *left; 75 struct Node *left;
70 struct Node *right; 76 struct Node *right;
@@ -108,7 +114,7 @@ typedef enum {
108 PREC_EQUALITY, // == != 114 PREC_EQUALITY, // == !=
109 PREC_COMPARISON, // < > <= >= 115 PREC_COMPARISON, // < > <= >=
110 PREC_TERM, // + - 116 PREC_TERM, // + -
111 PREC_FACTOR, // * / 117 PREC_FACTOR, // * / %
112 PREC_UNARY, // ! - 118 PREC_UNARY, // ! -
113 PREC_CALL, // . () 119 PREC_CALL, // . ()
114 PREC_PRIMARY // highest precedence 120 PREC_PRIMARY // highest precedence
@@ -136,7 +142,9 @@ ParseRule parse_rules[] = {
136 [TOK_ADD] = {NULL, parse_binary, PREC_TERM}, 142 [TOK_ADD] = {NULL, parse_binary, PREC_TERM},
137 [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR}, 143 [TOK_DIV] = {NULL, parse_binary, PREC_FACTOR},
138 [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR}, 144 [TOK_MUL] = {NULL, parse_binary, PREC_FACTOR},
139 [TOK_NUMBER] = {parse_number, NULL, PREC_NONE}, 145 [TOK_MOD] = {NULL, parse_binary, PREC_FACTOR},
146 [TOK_NUM_INT] = {parse_number, NULL, PREC_NONE},
147 [TOK_NUM_FLOAT] = {parse_number, NULL, PREC_NONE},
140 [TOK_EOF] = {NULL, NULL, PREC_NONE}, 148 [TOK_EOF] = {NULL, NULL, PREC_NONE},
141}; 149};
142 150
@@ -201,7 +209,7 @@ parse_unary(Parser *parser) {
201 print("parsing unary "); 209 print("parsing unary ");
202 print_token(prev); 210 print_token(prev);
203#endif 211#endif
204 TokenKind kind = parser->previous.kind; 212 TokenKind kind = prev.kind;
205 parse_expr(parser, PREC_LOW); 213 parse_expr(parser, PREC_LOW);
206 // TODO: ... 214 // TODO: ...
207 switch (kind) { 215 switch (kind) {
@@ -227,6 +235,7 @@ parse_binary(Parser *parser) {
227 case TOK_SUB: node = node_alloc(NODE_SUB, prev, parser->storage); break; 235 case TOK_SUB: node = node_alloc(NODE_SUB, prev, parser->storage); break;
228 case TOK_MUL: node = node_alloc(NODE_MUL, prev, parser->storage); break; 236 case TOK_MUL: node = node_alloc(NODE_MUL, prev, parser->storage); break;
229 case TOK_DIV: node = node_alloc(NODE_DIV, prev, parser->storage); break; 237 case TOK_DIV: node = node_alloc(NODE_DIV, prev, parser->storage); break;
238 case TOK_MOD: node = node_alloc(NODE_MOD, prev, parser->storage); break;
230 default: { 239 default: {
231 parse_emit_err(parser, prev, cstr("unreachable")); 240 parse_emit_err(parser, prev, cstr("unreachable"));
232 return; 241 return;
@@ -244,9 +253,18 @@ parse_number(Parser *parser) {
244 print("parsing number "); 253 print("parsing number ");
245 print_token(prev); 254 print_token(prev);
246#endif 255#endif
247 Node *node = node_alloc(NODE_NUMBER, prev, parser->storage); 256 Node *node = NULL;
248 node->value.i = str_to_int(prev.val); 257 switch (prev.kind) {
249 // TODO: handle sign and/or floating point values. 258 case TOK_NUM_INT: {
259 node = node_alloc(NODE_NUM_INT, prev, parser->storage);
260 node->value.i = str_to_int(prev.val);
261 } break;
262 case TOK_NUM_FLOAT: {
263 node = node_alloc(NODE_NUM_FLOAT, prev, parser->storage);
264 node->value.d = str_to_float(prev.val);
265 } break;
266 default: break;
267 }
250 array_push(parser->nodes, node, parser->storage); 268 array_push(parser->nodes, node, parser->storage);
251} 269}
252 270
@@ -268,7 +286,8 @@ graph_node(Node *node) {
268 print("%d [width=2.5,shape=Mrecord,label=\"", node->id); 286 print("%d [width=2.5,shape=Mrecord,label=\"", node->id);
269 print("<top> %s ", node_str[node->kind]); 287 print("<top> %s ", node_str[node->kind]);
270 switch (node->kind) { 288 switch (node->kind) {
271 case NODE_NUMBER: print("| Value: %d", node->value.i); break; 289 case NODE_NUM_INT: print("| Value: %d", node->value.i); break;
290 case NODE_NUM_FLOAT: print("| Value: %f{2}", node->value.d); break;
272 default: break; 291 default: break;
273 } 292 }
274 print("| Line: %d | Col: %d ", node->line, node->col); 293 print("| Line: %d | Col: %d ", node->line, node->col);
diff --git a/tests/expressions.bad b/tests/expressions.bad
index af60b9b..5b114d1 100644
--- a/tests/expressions.bad
+++ b/tests/expressions.bad
@@ -1,7 +1,9 @@
11 + 2 11 + -2
2 2
31 + 2 * 3 31 + 2 * 3
4 4
51 + 2 * 3 - 4 51 + 2 * 3 - 4
6 6
71 + 2 * (3 - 4) 71 + 2 * (3 - 4)
8
91.0 - 1234.56e-3