From 9c047e9d7be4ffffd868922c6209f796699aefcf Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Sat, 12 Mar 2022 10:43:10 +0100 Subject: Add number parsing --- src/errors.c | 1 + src/errors.h | 1 + src/lexer.c | 8 +-- src/parser.c | 168 +++++++++++++++++++++++++++++++---------------------------- src/parser.h | 16 +++--- 5 files changed, 101 insertions(+), 93 deletions(-) (limited to 'src') diff --git a/src/errors.c b/src/errors.c index efc834f..254ff0a 100644 --- a/src/errors.c +++ b/src/errors.c @@ -5,6 +5,7 @@ static const char* error_msgs[] = { [ERR_UNMATCHED_STRING] = "error: unmatched string delimiter", [ERR_UNKNOWN_TOK_TYPE] = "error: unknown token type", [ERR_MALFORMED_NUMBER] = "error: malformed number token", + [ERR_UNIMPLEMENTED] = "error: not implemented", }; static Error current_error = {.value = ERR_OK}; diff --git a/src/errors.h b/src/errors.h index 8a378a2..d66de46 100644 --- a/src/errors.h +++ b/src/errors.h @@ -13,6 +13,7 @@ typedef enum ErrorValue { ERR_UNMATCHED_STRING, ERR_UNKNOWN_TOK_TYPE, ERR_MALFORMED_NUMBER, + ERR_UNIMPLEMENTED, ERR_OK, } ErrorValue; diff --git a/src/lexer.c b/src/lexer.c index f63ff4f..ddf5d81 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -223,10 +223,10 @@ tokenize(const StringView *sv) { break; } size_t n = 1; - bool is_number = c == '-' && !is_delimiter(scan_peek(&scanner)); - is_number = c == '+' && !is_delimiter(scan_peek(&scanner)); - is_number = is_number || (c >= '0' && c <= '9'); - if (is_number) { + bool num = c == '-' && !is_delimiter(scan_peek(&scanner)); + num = num || (c == '+' && !is_delimiter(scan_peek(&scanner))); + num = num || (c >= '0' && c <= '9'); + if (num) { while (!is_delimiter(scan_peek(&scanner))) { c = scan_next(&scanner); n++; diff --git a/src/parser.c b/src/parser.c index dfc3e56..66f7114 100644 --- a/src/parser.c +++ b/src/parser.c @@ -19,108 +19,114 @@ has_next(Parser *parser) { Node parse_number(Parser *parser) { Token tok = next_token(parser); - // TODO: do the dance. - // if error: - // return (Node){.type = NODE_ERR}; - // size_t - // scan_number_token(Scanner *scanner) { - // // TODO: This looks like more a parsing problem than lexer, - // // consider moving it there. If starts with `-` and there is no - // // delimiter after, or if it starts with a number, it is - // // TOKEN_NUMBER. - // char first = scan_next(scanner); - // char second = scan_peek(scanner); - // size_t n = 1; - // if (first == '0' && !is_delimiter(second)) { - // if (second == 'x') { - // // Hex constant. - // scan_next(scanner); - // n++; - // if (is_delimiter(scan_peek(scanner))) { - // return 0; - // } - // while (!is_delimiter(scan_peek(scanner))) { - // char c = scan_next(scanner); - // if (!(c >= '0' && c <= '9') && - // !(c >= 'a' && c <= 'f') && - // !(c >= 'A' && c <= 'F')) { - // return 0; - // } - // n++; - // } - // return n; - // } else if (second == 'b') { - // // Binary constant. - // scan_next(scanner); - // n++; - // if (is_delimiter(scan_peek(scanner))) { - // return 0; - // } - // while (!is_delimiter(scan_peek(scanner))) { - // char c = scan_next(scanner); - // if (!(c == '0' || c == '1')) { - // return 0; - // } - // n++; - // } - // } - // } + bool negative = false; + int base = 10; + char c = sv_next(&tok.value); + if (c == '-') { + negative = true; + c = sv_next(&tok.value); + } + if (c == '+') { + c = sv_next(&tok.value); + } + if (c == '0') { + c = sv_next(&tok.value); + if (c == 'x') { + base = 16; + c = sv_next(&tok.value); + } else if (c == 'b') { + base = 2; + c = sv_next(&tok.value); + } else if (!(c >= '0' && c <= '9')){ + push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); + return (Node){.type = NODE_ERR}; + } + } - // // Decimal number or floating point. - // bool has_dot = false; - // while (!is_delimiter(scan_peek(scanner))) { - // char c = scan_next(scanner); - // if (c == '.') { - // if (has_dot) { - // return 0; - // } - // has_dot = true; - // } else if (!(c >= '0' && c <= '9')) { - // return 0; - // } - // n++; - // } - // return n; - // } - return (Node){.type = NODE_NUMBER, .string = 53}; + // Integral part. + u64 integral = 0; + while (c != '\0') { + ssize_t current = 0; + if (c >= 'a' && c <= 'z' && base == 16) { + current = (c - 'a') + 10; + } else if (c >= 'A' && c <= 'Z' && base == 16) { + current = (c - 'A') + 10; + } else if (c >= '0' && c <= '9') { + current = (c - '0'); + } else if (c == '.') { + c = sv_next(&tok.value); + break; + } else { + push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); + return (Node){.type = NODE_ERR}; + } + integral = integral * base + current; + c = sv_next(&tok.value); + } + + // Fractional part. + u64 fractional = 0; + while (c != '\0') { + ssize_t current = 0; + if (c >= 'a' && c <= 'z' && base == 16) { + current = (c - 'a') + 10; + } else if (c >= 'A' && c <= 'Z' && base == 16) { + current = (c - 'A') + 10; + } else if (c >= '0' && c <= '9') { + current = (c - '0'); + } else { + push_error(ERR_TYPE_PARSER, ERR_MALFORMED_NUMBER, tok.line, tok.col); + return (Node){.type = NODE_ERR}; + } + fractional = fractional * base + current; + c = sv_next(&tok.value); + } + + return (Node){ + .type = NODE_NUMBER, + .number.negative = negative, + .number.integral = integral, + .number.fractional = fractional, + }; } Node parse_next(Parser *parser) { - if (!has_next(parser)) { - return; - } - Token tok = peek_token(parser); switch (tok.type) { case TOKEN_NUMBER: { return parse_number(parser); } break; case TOKEN_STRING: { - // printf("STRING!\n"); - next_token(parser); // FIXME: <==== + // TODO: return parse_string(parser); + next_token(parser); return (Node){.type = NODE_STRING, .string = tok.value}; - // TODO: parse_string(parser); } break; case TOKEN_LPAREN: { - // printf("LPAREN OH MY!\n"); - // TODO: parse_list(parser); + // TODO: return parse_paren(parser); + push_error(ERR_TYPE_PARSER, ERR_UNIMPLEMENTED, tok.line, tok.col); + return (Node){.type = NODE_ERR}; } break; default: { - // printf("OH OHHHH\n"); - // ... + push_error(ERR_TYPE_PARSER, ERR_UNKNOWN_TOK_TYPE, tok.line, tok.col); + return (Node){.type = NODE_ERR}; } break; } - next_token(parser); // FIXME: <==== - // TODO: this should be an error } void print_node(Node node) { switch (node.type) { case NODE_NUMBER: { - printf("%ld\n", node.number); + if (node.number.negative) { + printf("-"); + } + if (node.number.fractional != 0) { + printf("%zu.%zu\n", node.number.integral, node.number.fractional); + } else { + printf("%zu\n", node.number.integral); + } } break; case NODE_STRING: { sv_write(&node.string); @@ -138,13 +144,17 @@ parse(Token *tokens) { .tokens = tokens, .current = 0, }; + + // DEBUG: TOKENS + for (size_t i = 0; i < array_size(tokens); i++) { + print_token(tokens[i]); + } + while (has_next(&parser)) { Node node = parse_next(&parser); if (node.type == NODE_ERR) { return; } print_node(node); - // Token tok = next_token(&parser); - // print_token(tok); } } diff --git a/src/parser.h b/src/parser.h index 3e016d3..21a2711 100644 --- a/src/parser.h +++ b/src/parser.h @@ -27,16 +27,12 @@ typedef struct Node { NodeType type; union { - // Integer numbers. - // u64 as_u64; - // u32 as_u32; - // u16 as_u16; - // u8 as_u8; - // s64 as_s64; - // s32 as_s32; - // s16 as_s16; - // s8 as_s8; - s64 number; + // Numbers. + struct { + bool negative; + u64 integral; + u64 fractional; + } number; // String. StringView string; -- cgit v1.2.1