aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorBad Diode <bd@badd10de.dev>2022-03-12 10:43:10 +0100
committerBad Diode <bd@badd10de.dev>2022-03-12 10:43:10 +0100
commit9c047e9d7be4ffffd868922c6209f796699aefcf (patch)
tree5ada11c01e958430e6f1584157994f5ebaf5aef5 /src
parentfa32ad3224b3e362e5f79eee8785334f4bebdbc8 (diff)
downloadbdl-9c047e9d7be4ffffd868922c6209f796699aefcf.tar.gz
bdl-9c047e9d7be4ffffd868922c6209f796699aefcf.zip
Add number parsing
Diffstat (limited to 'src')
-rw-r--r--src/errors.c1
-rw-r--r--src/errors.h1
-rw-r--r--src/lexer.c8
-rw-r--r--src/parser.c168
-rw-r--r--src/parser.h16
5 files changed, 101 insertions, 93 deletions
diff --git a/src/errors.c b/src/errors.c
index efc834f..254ff0a 100644
--- a/src/errors.c
+++ b/src/errors.c
@@ -5,6 +5,7 @@ static const char* error_msgs[] = {
5 [ERR_UNMATCHED_STRING] = "error: unmatched string delimiter", 5 [ERR_UNMATCHED_STRING] = "error: unmatched string delimiter",
6 [ERR_UNKNOWN_TOK_TYPE] = "error: unknown token type", 6 [ERR_UNKNOWN_TOK_TYPE] = "error: unknown token type",
7 [ERR_MALFORMED_NUMBER] = "error: malformed number token", 7 [ERR_MALFORMED_NUMBER] = "error: malformed number token",
8 [ERR_UNIMPLEMENTED] = "error: not implemented",
8}; 9};
9 10
10static Error current_error = {.value = ERR_OK}; 11static Error current_error = {.value = ERR_OK};
diff --git a/src/errors.h b/src/errors.h
index 8a378a2..d66de46 100644
--- a/src/errors.h
+++ b/src/errors.h
@@ -13,6 +13,7 @@ typedef enum ErrorValue {
13 ERR_UNMATCHED_STRING, 13 ERR_UNMATCHED_STRING,
14 ERR_UNKNOWN_TOK_TYPE, 14 ERR_UNKNOWN_TOK_TYPE,
15 ERR_MALFORMED_NUMBER, 15 ERR_MALFORMED_NUMBER,
16 ERR_UNIMPLEMENTED,
16 ERR_OK, 17 ERR_OK,
17} ErrorValue; 18} ErrorValue;
18 19
diff --git a/src/lexer.c b/src/lexer.c
index f63ff4f..ddf5d81 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -223,10 +223,10 @@ tokenize(const StringView *sv) {
223 break; 223 break;
224 } 224 }
225 size_t n = 1; 225 size_t n = 1;
226 bool is_number = c == '-' && !is_delimiter(scan_peek(&scanner)); 226 bool num = c == '-' && !is_delimiter(scan_peek(&scanner));
227 is_number = c == '+' && !is_delimiter(scan_peek(&scanner)); 227 num = num || (c == '+' && !is_delimiter(scan_peek(&scanner)));
228 is_number = is_number || (c >= '0' && c <= '9'); 228 num = num || (c >= '0' && c <= '9');
229 if (is_number) { 229 if (num) {
230 while (!is_delimiter(scan_peek(&scanner))) { 230 while (!is_delimiter(scan_peek(&scanner))) {
231 c = scan_next(&scanner); 231 c = scan_next(&scanner);
232 n++; 232 n++;
diff --git a/src/parser.c b/src/parser.c
index dfc3e56..66f7114 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -19,108 +19,114 @@ has_next(Parser *parser) {
19Node 19Node
20parse_number(Parser *parser) { 20parse_number(Parser *parser) {
21 Token tok = next_token(parser); 21 Token tok = next_token(parser);
22 // TODO: do the dance.
23 // if error:
24 // return (Node){.type = NODE_ERR};
25 22
26 // size_t 23 bool negative = false;
27 // scan_number_token(Scanner *scanner) { 24 int base = 10;
28 // // TODO: This looks like more a parsing problem than lexer, 25 char c = sv_next(&tok.value);
29 // // consider moving it there. If starts with `-` and there is no 26 if (c == '-') {
30 // // delimiter after, or if it starts with a number, it is 27 negative = true;
31 // // TOKEN_NUMBER. 28 c = sv_next(&tok.value);
32 // char first = scan_next(scanner); 29 }
33 // char second = scan_peek(scanner); 30 if (c == '+') {
34 // size_t n = 1; 31 c = sv_next(&tok.value);
35 // if (first == '0' && !is_delimiter(second)) { 32 }
36 // if (second == 'x') { 33 if (c == '0') {
37 // // Hex constant. 34 c = sv_next(&tok.value);
38 // scan_next(scanner); 35 if (c == 'x') {
39 // n++; 36 base = 16;
40 // if (is_delimiter(scan_peek(scanner))) { 37 c = sv_next(&tok.value);
41 // return 0; 38 } else if (c == 'b') {
42 // } 39 base = 2;
43 // while (!is_delimiter(scan_peek(scanner))) { 40 c = sv_next(&tok.value);
44 // char c = scan_next(scanner); 41 } else if (!(c >= '0' && c <= '9')){
45 // if (!(c >= '0' && c <= '9') && 42 push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col);
46 // !(c >= 'a' && c <= 'f') && 43 return (Node){.type = NODE_ERR};
47 // !(c >= 'A' && c <= 'F')) { 44 }
48 // return 0; 45 }
49 // }
50 // n++;
51 // }
52 // return n;
53 // } else if (second == 'b') {
54 // // Binary constant.
55 // scan_next(scanner);
56 // n++;
57 // if (is_delimiter(scan_peek(scanner))) {
58 // return 0;
59 // }
60 // while (!is_delimiter(scan_peek(scanner))) {
61 // char c = scan_next(scanner);
62 // if (!(c == '0' || c == '1')) {
63 // return 0;
64 // }
65 // n++;
66 // }
67 // }
68 // }
69 46
70 // // Decimal number or floating point. 47 // Integral part.
71 // bool has_dot = false; 48 u64 integral = 0;
72 // while (!is_delimiter(scan_peek(scanner))) { 49 while (c != '\0') {
73 // char c = scan_next(scanner); 50 ssize_t current = 0;
74 // if (c == '.') { 51 if (c >= 'a' && c <= 'z' && base == 16) {
75 // if (has_dot) { 52 current = (c - 'a') + 10;
76 // return 0; 53 } else if (c >= 'A' && c <= 'Z' && base == 16) {
77 // } 54 current = (c - 'A') + 10;
78 // has_dot = true; 55 } else if (c >= '0' && c <= '9') {
79 // } else if (!(c >= '0' && c <= '9')) { 56 current = (c - '0');
80 // return 0; 57 } else if (c == '.') {
81 // } 58 c = sv_next(&tok.value);
82 // n++; 59 break;
83 // } 60 } else {
84 // return n; 61 push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col);
85 // } 62 return (Node){.type = NODE_ERR};
86 return (Node){.type = NODE_NUMBER, .string = 53}; 63 }
64 integral = integral * base + current;
65 c = sv_next(&tok.value);
66 }
67
68 // Fractional part.
69 u64 fractional = 0;
70 while (c != '\0') {
71 ssize_t current = 0;
72 if (c >= 'a' && c <= 'z' && base == 16) {
73 current = (c - 'a') + 10;
74 } else if (c >= 'A' && c <= 'Z' && base == 16) {
75 current = (c - 'A') + 10;
76 } else if (c >= '0' && c <= '9') {
77 current = (c - '0');
78 } else {
79 push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col);
80 return (Node){.type = NODE_ERR};
81 }
82 fractional = fractional * base + current;
83 c = sv_next(&tok.value);
84 }
85
86 return (Node){
87 .type = NODE_NUMBER,
88 .number.negative = negative,
89 .number.integral = integral,
90 .number.fractional = fractional,
91 };
87} 92}
88 93
89Node 94Node
90parse_next(Parser *parser) { 95parse_next(Parser *parser) {
91 if (!has_next(parser)) {
92 return;
93 }
94
95 Token tok = peek_token(parser); 96 Token tok = peek_token(parser);
96 switch (tok.type) { 97 switch (tok.type) {
97 case TOKEN_NUMBER: { 98 case TOKEN_NUMBER: {
98 return parse_number(parser); 99 return parse_number(parser);
99 } break; 100 } break;
100 case TOKEN_STRING: { 101 case TOKEN_STRING: {
101 // printf("STRING!\n"); 102 // TODO: return parse_string(parser);
102 next_token(parser); // FIXME: <==== 103 next_token(parser);
103 return (Node){.type = NODE_STRING, .string = tok.value}; 104 return (Node){.type = NODE_STRING, .string = tok.value};
104 // TODO: parse_string(parser);
105 } break; 105 } break;
106 case TOKEN_LPAREN: { 106 case TOKEN_LPAREN: {
107 // printf("LPAREN OH MY!\n"); 107 // TODO: return parse_paren(parser);
108 // TODO: parse_list(parser); 108 push_error(ERR_TYPE_PARSER, ERR_UNIMPLEMENTED, tok.line, tok.col);
109 return (Node){.type = NODE_ERR};
109 } break; 110 } break;
110 default: { 111 default: {
111 // printf("OH OHHHH\n"); 112 push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_TOK_TYPE, tok.line, tok.col);
112 // ... 113 return (Node){.type = NODE_ERR};
113 } break; 114 } break;
114 } 115 }
115 next_token(parser); // FIXME: <====
116 // TODO: this should be an error
117} 116}
118 117
119void 118void
120print_node(Node node) { 119print_node(Node node) {
121 switch (node.type) { 120 switch (node.type) {
122 case NODE_NUMBER: { 121 case NODE_NUMBER: {
123 printf("%ld\n", node.number); 122 if (node.number.negative) {
123 printf("-");
124 }
125 if (node.number.fractional != 0) {
126 printf("%zu.%zu\n", node.number.integral, node.number.fractional);
127 } else {
128 printf("%zu\n", node.number.integral);
129 }
124 } break; 130 } break;
125 case NODE_STRING: { 131 case NODE_STRING: {
126 sv_write(&node.string); 132 sv_write(&node.string);
@@ -138,13 +144,17 @@ parse(Token *tokens) {
138 .tokens = tokens, 144 .tokens = tokens,
139 .current = 0, 145 .current = 0,
140 }; 146 };
147
148 // DEBUG: TOKENS
149 for (size_t i = 0; i < array_size(tokens); i++) {
150 print_token(tokens[i]);
151 }
152
141 while (has_next(&parser)) { 153 while (has_next(&parser)) {
142 Node node = parse_next(&parser); 154 Node node = parse_next(&parser);
143 if (node.type == NODE_ERR) { 155 if (node.type == NODE_ERR) {
144 return; 156 return;
145 } 157 }
146 print_node(node); 158 print_node(node);
147 // Token tok = next_token(&parser);
148 // print_token(tok);
149 } 159 }
150} 160}
diff --git a/src/parser.h b/src/parser.h
index 3e016d3..21a2711 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -27,16 +27,12 @@ typedef struct Node {
27 NodeType type; 27 NodeType type;
28 28
29 union { 29 union {
30 // Integer numbers. 30 // Numbers.
31 // u64 as_u64; 31 struct {
32 // u32 as_u32; 32 bool negative;
33 // u16 as_u16; 33 u64 integral;
34 // u8 as_u8; 34 u64 fractional;
35 // s64 as_s64; 35 } number;
36 // s32 as_s32;
37 // s16 as_s16;
38 // s8 as_s8;
39 s64 number;
40 36
41 // String. 37 // String.
42 StringView string; 38 StringView string;