diff options
author | Bad Diode <bd@badd10de.dev> | 2022-03-12 10:43:10 +0100 |
---|---|---|
committer | Bad Diode <bd@badd10de.dev> | 2022-03-12 10:43:10 +0100 |
commit | 9c047e9d7be4ffffd868922c6209f796699aefcf (patch) | |
tree | 5ada11c01e958430e6f1584157994f5ebaf5aef5 | |
parent | fa32ad3224b3e362e5f79eee8785334f4bebdbc8 (diff) | |
download | bdl-9c047e9d7be4ffffd868922c6209f796699aefcf.tar.gz bdl-9c047e9d7be4ffffd868922c6209f796699aefcf.zip |
Add number parsing
-rw-r--r-- | src/errors.c | 1 | ||||
-rw-r--r-- | src/errors.h | 1 | ||||
-rw-r--r-- | src/lexer.c | 8 | ||||
-rw-r--r-- | src/parser.c | 168 | ||||
-rw-r--r-- | src/parser.h | 16 |
5 files changed, 101 insertions, 93 deletions
diff --git a/src/errors.c b/src/errors.c index efc834f..254ff0a 100644 --- a/src/errors.c +++ b/src/errors.c | |||
@@ -5,6 +5,7 @@ static const char* error_msgs[] = { | |||
5 | [ERR_UNMATCHED_STRING] = "error: unmatched string delimiter", | 5 | [ERR_UNMATCHED_STRING] = "error: unmatched string delimiter", |
6 | [ERR_UNKNOWN_TOK_TYPE] = "error: unknown token type", | 6 | [ERR_UNKNOWN_TOK_TYPE] = "error: unknown token type", |
7 | [ERR_MALFORMED_NUMBER] = "error: malformed number token", | 7 | [ERR_MALFORMED_NUMBER] = "error: malformed number token", |
8 | [ERR_UNIMPLEMENTED] = "error: not implemented", | ||
8 | }; | 9 | }; |
9 | 10 | ||
10 | static Error current_error = {.value = ERR_OK}; | 11 | static Error current_error = {.value = ERR_OK}; |
diff --git a/src/errors.h b/src/errors.h index 8a378a2..d66de46 100644 --- a/src/errors.h +++ b/src/errors.h | |||
@@ -13,6 +13,7 @@ typedef enum ErrorValue { | |||
13 | ERR_UNMATCHED_STRING, | 13 | ERR_UNMATCHED_STRING, |
14 | ERR_UNKNOWN_TOK_TYPE, | 14 | ERR_UNKNOWN_TOK_TYPE, |
15 | ERR_MALFORMED_NUMBER, | 15 | ERR_MALFORMED_NUMBER, |
16 | ERR_UNIMPLEMENTED, | ||
16 | ERR_OK, | 17 | ERR_OK, |
17 | } ErrorValue; | 18 | } ErrorValue; |
18 | 19 | ||
diff --git a/src/lexer.c b/src/lexer.c index f63ff4f..ddf5d81 100644 --- a/src/lexer.c +++ b/src/lexer.c | |||
@@ -223,10 +223,10 @@ tokenize(const StringView *sv) { | |||
223 | break; | 223 | break; |
224 | } | 224 | } |
225 | size_t n = 1; | 225 | size_t n = 1; |
226 | bool is_number = c == '-' && !is_delimiter(scan_peek(&scanner)); | 226 | bool num = c == '-' && !is_delimiter(scan_peek(&scanner)); |
227 | is_number = c == '+' && !is_delimiter(scan_peek(&scanner)); | 227 | num = num || (c == '+' && !is_delimiter(scan_peek(&scanner))); |
228 | is_number = is_number || (c >= '0' && c <= '9'); | 228 | num = num || (c >= '0' && c <= '9'); |
229 | if (is_number) { | 229 | if (num) { |
230 | while (!is_delimiter(scan_peek(&scanner))) { | 230 | while (!is_delimiter(scan_peek(&scanner))) { |
231 | c = scan_next(&scanner); | 231 | c = scan_next(&scanner); |
232 | n++; | 232 | n++; |
diff --git a/src/parser.c b/src/parser.c index dfc3e56..66f7114 100644 --- a/src/parser.c +++ b/src/parser.c | |||
@@ -19,108 +19,114 @@ has_next(Parser *parser) { | |||
19 | Node | 19 | Node |
20 | parse_number(Parser *parser) { | 20 | parse_number(Parser *parser) { |
21 | Token tok = next_token(parser); | 21 | Token tok = next_token(parser); |
22 | // TODO: do the dance. | ||
23 | // if error: | ||
24 | // return (Node){.type = NODE_ERR}; | ||
25 | 22 | ||
26 | // size_t | 23 | bool negative = false; |
27 | // scan_number_token(Scanner *scanner) { | 24 | int base = 10; |
28 | // // TODO: This looks like more a parsing problem than lexer, | 25 | char c = sv_next(&tok.value); |
29 | // // consider moving it there. If starts with `-` and there is no | 26 | if (c == '-') { |
30 | // // delimiter after, or if it starts with a number, it is | 27 | negative = true; |
31 | // // TOKEN_NUMBER. | 28 | c = sv_next(&tok.value); |
32 | // char first = scan_next(scanner); | 29 | } |
33 | // char second = scan_peek(scanner); | 30 | if (c == '+') { |
34 | // size_t n = 1; | 31 | c = sv_next(&tok.value); |
35 | // if (first == '0' && !is_delimiter(second)) { | 32 | } |
36 | // if (second == 'x') { | 33 | if (c == '0') { |
37 | // // Hex constant. | 34 | c = sv_next(&tok.value); |
38 | // scan_next(scanner); | 35 | if (c == 'x') { |
39 | // n++; | 36 | base = 16; |
40 | // if (is_delimiter(scan_peek(scanner))) { | 37 | c = sv_next(&tok.value); |
41 | // return 0; | 38 | } else if (c == 'b') { |
42 | // } | 39 | base = 2; |
43 | // while (!is_delimiter(scan_peek(scanner))) { | 40 | c = sv_next(&tok.value); |
44 | // char c = scan_next(scanner); | 41 | } else if (!(c >= '0' && c <= '9')){ |
45 | // if (!(c >= '0' && c <= '9') && | 42 | push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); |
46 | // !(c >= 'a' && c <= 'f') && | 43 | return (Node){.type = NODE_ERR}; |
47 | // !(c >= 'A' && c <= 'F')) { | 44 | } |
48 | // return 0; | 45 | } |
49 | // } | ||
50 | // n++; | ||
51 | // } | ||
52 | // return n; | ||
53 | // } else if (second == 'b') { | ||
54 | // // Binary constant. | ||
55 | // scan_next(scanner); | ||
56 | // n++; | ||
57 | // if (is_delimiter(scan_peek(scanner))) { | ||
58 | // return 0; | ||
59 | // } | ||
60 | // while (!is_delimiter(scan_peek(scanner))) { | ||
61 | // char c = scan_next(scanner); | ||
62 | // if (!(c == '0' || c == '1')) { | ||
63 | // return 0; | ||
64 | // } | ||
65 | // n++; | ||
66 | // } | ||
67 | // } | ||
68 | // } | ||
69 | 46 | ||
70 | // // Decimal number or floating point. | 47 | // Integral part. |
71 | // bool has_dot = false; | 48 | u64 integral = 0; |
72 | // while (!is_delimiter(scan_peek(scanner))) { | 49 | while (c != '\0') { |
73 | // char c = scan_next(scanner); | 50 | ssize_t current = 0; |
74 | // if (c == '.') { | 51 | if (c >= 'a' && c <= 'z' && base == 16) { |
75 | // if (has_dot) { | 52 | current = (c - 'a') + 10; |
76 | // return 0; | 53 | } else if (c >= 'A' && c <= 'Z' && base == 16) { |
77 | // } | 54 | current = (c - 'A') + 10; |
78 | // has_dot = true; | 55 | } else if (c >= '0' && c <= '9') { |
79 | // } else if (!(c >= '0' && c <= '9')) { | 56 | current = (c - '0'); |
80 | // return 0; | 57 | } else if (c == '.') { |
81 | // } | 58 | c = sv_next(&tok.value); |
82 | // n++; | 59 | break; |
83 | // } | 60 | } else { |
84 | // return n; | 61 | push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); |
85 | // } | 62 | return (Node){.type = NODE_ERR}; |
86 | return (Node){.type = NODE_NUMBER, .string = 53}; | 63 | } |
64 | integral = integral * base + current; | ||
65 | c = sv_next(&tok.value); | ||
66 | } | ||
67 | |||
68 | // Fractional part. | ||
69 | u64 fractional = 0; | ||
70 | while (c != '\0') { | ||
71 | ssize_t current = 0; | ||
72 | if (c >= 'a' && c <= 'z' && base == 16) { | ||
73 | current = (c - 'a') + 10; | ||
74 | } else if (c >= 'A' && c <= 'Z' && base == 16) { | ||
75 | current = (c - 'A') + 10; | ||
76 | } else if (c >= '0' && c <= '9') { | ||
77 | current = (c - '0'); | ||
78 | } else { | ||
79 | push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); | ||
80 | return (Node){.type = NODE_ERR}; | ||
81 | } | ||
82 | fractional = fractional * base + current; | ||
83 | c = sv_next(&tok.value); | ||
84 | } | ||
85 | |||
86 | return (Node){ | ||
87 | .type = NODE_NUMBER, | ||
88 | .number.negative = negative, | ||
89 | .number.integral = integral, | ||
90 | .number.fractional = fractional, | ||
91 | }; | ||
87 | } | 92 | } |
88 | 93 | ||
89 | Node | 94 | Node |
90 | parse_next(Parser *parser) { | 95 | parse_next(Parser *parser) { |
91 | if (!has_next(parser)) { | ||
92 | return; | ||
93 | } | ||
94 | |||
95 | Token tok = peek_token(parser); | 96 | Token tok = peek_token(parser); |
96 | switch (tok.type) { | 97 | switch (tok.type) { |
97 | case TOKEN_NUMBER: { | 98 | case TOKEN_NUMBER: { |
98 | return parse_number(parser); | 99 | return parse_number(parser); |
99 | } break; | 100 | } break; |
100 | case TOKEN_STRING: { | 101 | case TOKEN_STRING: { |
101 | // printf("STRING!\n"); | 102 | // TODO: return parse_string(parser); |
102 | next_token(parser); // FIXME: <==== | 103 | next_token(parser); |
103 | return (Node){.type = NODE_STRING, .string = tok.value}; | 104 | return (Node){.type = NODE_STRING, .string = tok.value}; |
104 | // TODO: parse_string(parser); | ||
105 | } break; | 105 | } break; |
106 | case TOKEN_LPAREN: { | 106 | case TOKEN_LPAREN: { |
107 | // printf("LPAREN OH MY!\n"); | 107 | // TODO: return parse_paren(parser); |
108 | // TODO: parse_list(parser); | 108 | push_error(ERR_TYPE_PARSER, ERR_UNIMPLEMENTED, tok.line, tok.col); |
109 | return (Node){.type = NODE_ERR}; | ||
109 | } break; | 110 | } break; |
110 | default: { | 111 | default: { |
111 | // printf("OH OHHHH\n"); | 112 | push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_TOK_TYPE, tok.line, tok.col); |
112 | // ... | 113 | return (Node){.type = NODE_ERR}; |
113 | } break; | 114 | } break; |
114 | } | 115 | } |
115 | next_token(parser); // FIXME: <==== | ||
116 | // TODO: this should be an error | ||
117 | } | 116 | } |
118 | 117 | ||
119 | void | 118 | void |
120 | print_node(Node node) { | 119 | print_node(Node node) { |
121 | switch (node.type) { | 120 | switch (node.type) { |
122 | case NODE_NUMBER: { | 121 | case NODE_NUMBER: { |
123 | printf("%ld\n", node.number); | 122 | if (node.number.negative) { |
123 | printf("-"); | ||
124 | } | ||
125 | if (node.number.fractional != 0) { | ||
126 | printf("%zu.%zu\n", node.number.integral, node.number.fractional); | ||
127 | } else { | ||
128 | printf("%zu\n", node.number.integral); | ||
129 | } | ||
124 | } break; | 130 | } break; |
125 | case NODE_STRING: { | 131 | case NODE_STRING: { |
126 | sv_write(&node.string); | 132 | sv_write(&node.string); |
@@ -138,13 +144,17 @@ parse(Token *tokens) { | |||
138 | .tokens = tokens, | 144 | .tokens = tokens, |
139 | .current = 0, | 145 | .current = 0, |
140 | }; | 146 | }; |
147 | |||
148 | // DEBUG: TOKENS | ||
149 | for (size_t i = 0; i < array_size(tokens); i++) { | ||
150 | print_token(tokens[i]); | ||
151 | } | ||
152 | |||
141 | while (has_next(&parser)) { | 153 | while (has_next(&parser)) { |
142 | Node node = parse_next(&parser); | 154 | Node node = parse_next(&parser); |
143 | if (node.type == NODE_ERR) { | 155 | if (node.type == NODE_ERR) { |
144 | return; | 156 | return; |
145 | } | 157 | } |
146 | print_node(node); | 158 | print_node(node); |
147 | // Token tok = next_token(&parser); | ||
148 | // print_token(tok); | ||
149 | } | 159 | } |
150 | } | 160 | } |
diff --git a/src/parser.h b/src/parser.h index 3e016d3..21a2711 100644 --- a/src/parser.h +++ b/src/parser.h | |||
@@ -27,16 +27,12 @@ typedef struct Node { | |||
27 | NodeType type; | 27 | NodeType type; |
28 | 28 | ||
29 | union { | 29 | union { |
30 | // Integer numbers. | 30 | // Numbers. |
31 | // u64 as_u64; | 31 | struct { |
32 | // u32 as_u32; | 32 | bool negative; |
33 | // u16 as_u16; | 33 | u64 integral; |
34 | // u8 as_u8; | 34 | u64 fractional; |
35 | // s64 as_s64; | 35 | } number; |
36 | // s32 as_s32; | ||
37 | // s16 as_s16; | ||
38 | // s8 as_s8; | ||
39 | s64 number; | ||
40 | 36 | ||
41 | // String. | 37 | // String. |
42 | StringView string; | 38 | StringView string; |