From 928a58ad7977c0e2e445fd9c16db5726cda789e0 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Sat, 23 Oct 2021 13:01:10 +0200 Subject: Add (buggy) initial compilation of arithmetic ops --- src/bytecode/chunk.h | 5 ++ src/bytecode/compiler.h | 204 +++++++++++++++++++++++++++++++++++++++++++-- src/bytecode/darray.h | 0 src/bytecode/debug.h | 0 src/bytecode/errors.c | 0 src/bytecode/errors.h | 0 src/bytecode/lexer.c | 4 +- src/bytecode/lexer.h | 2 +- src/bytecode/main.c | 0 src/bytecode/objects.h | 0 src/bytecode/ops.h | 0 src/bytecode/read_line.c | 0 src/bytecode/read_line.h | 0 src/bytecode/string_view.c | 0 src/bytecode/string_view.h | 0 src/bytecode/types.h | 0 src/bytecode/vm.h | 4 +- 17 files changed, 207 insertions(+), 12 deletions(-) mode change 100644 => 100755 src/bytecode/chunk.h mode change 100644 => 100755 src/bytecode/compiler.h mode change 100644 => 100755 src/bytecode/darray.h mode change 100644 => 100755 src/bytecode/debug.h mode change 100644 => 100755 src/bytecode/errors.c mode change 100644 => 100755 src/bytecode/errors.h mode change 100644 => 100755 src/bytecode/lexer.c mode change 100644 => 100755 src/bytecode/lexer.h mode change 100644 => 100755 src/bytecode/main.c mode change 100644 => 100755 src/bytecode/objects.h mode change 100644 => 100755 src/bytecode/ops.h mode change 100644 => 100755 src/bytecode/read_line.c mode change 100644 => 100755 src/bytecode/read_line.h mode change 100644 => 100755 src/bytecode/string_view.c mode change 100644 => 100755 src/bytecode/string_view.h mode change 100644 => 100755 src/bytecode/types.h mode change 100644 => 100755 src/bytecode/vm.h (limited to 'src/bytecode') diff --git a/src/bytecode/chunk.h b/src/bytecode/chunk.h old mode 100644 new mode 100755 index e3c7383..81fc4cc --- a/src/bytecode/chunk.h +++ b/src/bytecode/chunk.h @@ -46,6 +46,11 @@ add_code(Chunk *chunk, u8 byte, size_t line, size_t col) { size_t add_constant(Chunk *chunk, Object obj) { + // FIXME?: Since we are using a single byte to store constant indices, we + // can only have 256 stored constants. If we need more we may need to add + // another instruction OP_CONSTANT_16 to have at least two bytes for + // constants. Alternatively, we could make that the default. Either way, for + // now it's fine. size_t pos = array_size(chunk->constants); array_push(chunk->constants, obj); return pos; diff --git a/src/bytecode/compiler.h b/src/bytecode/compiler.h old mode 100644 new mode 100755 index 6991a86..7ec1ca9 --- a/src/bytecode/compiler.h +++ b/src/bytecode/compiler.h @@ -31,21 +31,211 @@ has_next_token(const Visitor *visitor) { return visitor->current < array_size(visitor->tokens); } +void +emit_constant(Chunk *chunk, Token tok, Object obj) { + // TODO: Should we deduplicate constants? For example why store a number + // more than once instead of reusing the existing index? + size_t num_idx = add_constant(chunk, obj); + add_code(chunk, OP_CONSTANT, tok.line, tok.column); + add_code(chunk, num_idx, tok.line, tok.column); +} + +void +parse_fixnum(Chunk *chunk, Token tok) { + ssize_t num = 0; + int sign = 1; + for (size_t i = 0; i < tok.value.n; i++) { + char c = tok.value.start[i]; + if (c == '-') { + sign = -1; + continue; + } + num = num * 10 + (c - '0'); + } + emit_constant(chunk, tok, num); +} + +void parse_tree(Chunk *chunk, Visitor *vs); + +void +compile_list_primitive(Chunk *chunk, Visitor *vs, Token op_tok) { + Ops op; + switch (op_tok.type) { + case TOKEN_ADD: { + emit_constant(chunk, op_tok, 0); + op = OP_SUM; + } break; + case TOKEN_SUB: { + // TODO: fetch first element. + emit_constant(chunk, op_tok, 0); + op = OP_SUB; + } break; + case TOKEN_MUL: { + emit_constant(chunk, op_tok, 1); + op = OP_MUL; + } break; + case TOKEN_DIV: { + // TODO: fetch first element. + emit_constant(chunk, op_tok, 1); + op = OP_DIV; + } break; + case TOKEN_MOD: { + // TODO: fetch first element. + emit_constant(chunk, op_tok, 1); + op = OP_MOD; + } break; + default: { + } break; + } + while (has_next_token(vs)) { + Token tok = peek_token(vs); + if (tok.type == TOKEN_EOF) { + error_push((Error){ + .type = ERR_TYPE_PARSER, + .value = ERR_UNBALANCED_PAREN, + .line = op_tok.line, + .col = op_tok.column, + }); + return; + } + if (tok.type == TOKEN_RPAREN) { + next_token(vs); + break; + } + parse_tree(chunk, vs); + add_code(chunk, op, tok.line, tok.column); + } +} + +void +parse_list(Chunk *chunk, Visitor *vs) { + if (has_next_token(vs)) { + Token tok = next_token(vs); + print_token(tok); + // TODO: check if is function call. + switch (tok.type) { + case TOKEN_ADD: + case TOKEN_SUB: + case TOKEN_MUL: + case TOKEN_DIV: + case TOKEN_MOD:{ + compile_list_primitive(chunk, vs, tok); + } break; + default: { + error_push((Error){ + .type = ERR_TYPE_COMPILER, + .value = ERR_OBJ_NOT_CALLABLE, + .line = tok.line, + .line = tok.column, + }); + } break; + } + } +} + +void +parse_tree(Chunk *chunk, Visitor *vs) { + Token tok = next_token(vs); + switch (tok.type) { + case TOKEN_FIXNUM: { + parse_fixnum(chunk, tok); + return ; + } break; + case TOKEN_TRUE: { + // return obj_true; + return; + } break; + case TOKEN_FALSE: { + // return obj_false; + return; + } break; + case TOKEN_RPAREN: { + error_push((Error){ + .type = ERR_TYPE_PARSER, + .value = ERR_UNBALANCED_PAREN, + .line = tok.line, + .col = tok.column, + }); + return; + } break; + case TOKEN_QUOTE: { + // Object *base = make_pair(obj_quote, obj_nil); + // base->cdr = make_pair(obj_nil, obj_nil); + // push_root(base); + // Object *next_obj = parse_tree(vs); + // if (next_obj == obj_err) { + // return obj_err; + // } + // base->cdr->car = next_obj; + // return base; + return; + } break; + case TOKEN_LPAREN: { + parse_list(chunk, vs); + // Object *obj = parse_list(vs); + // if (obj == obj_err) { + // error_push((Error){ + // .type = ERR_TYPE_PARSER, + // .value = ERR_UNBALANCED_PAREN, + // .line = tok.line, + // .col = tok.column, + // }); + // } + // return obj; + return; + } break; + case TOKEN_STRING: { + // Object *obj = make_string(); + // push_root(obj); + // append_string(obj, tok.value); + // return obj; + return; + } break; + case TOKEN_SYMBOL: { + // Object *obj = make_symbol(tok.value); + // push_root(obj); + // return obj; + return; + } break; + case TOKEN_NIL: { + // return obj_nil; + return; + } break; + default: { + break; + } break; + } + error_push((Error){ + .type = ERR_TYPE_PARSER, + .value = ERR_EOF_REACHED, + .line = tok.line, + .col = tok.column, + }); + return; +} + Chunk * compile(Token *tokens) { Chunk *chunk = NULL; chunk = chunk_init(); + Visitor visitor = (Visitor){ + .tokens = tokens, + .current = 0, + }; + while (has_next_token(&visitor) && peek_token(&visitor).type != TOKEN_EOF) { + parse_tree(chunk, &visitor); + } // error_push((Error){ // .type = ERR_TYPE_COMPILER, // .value = ERR_UNKNOWN, // }); - size_t const_a = add_constant(chunk, 7); - add_code(chunk, OP_CONSTANT, 1, 1); - add_code(chunk, const_a, 1, 1); - size_t const_b = add_constant(chunk, 2); - add_code(chunk, OP_CONSTANT, 1, 2); - add_code(chunk, const_b, 1, 2); - add_code(chunk, OP_MOD, 1, 3); + // size_t const_a = add_constant(chunk, 7); + // add_code(chunk, OP_CONSTANT, 1, 1); + // add_code(chunk, const_a, 1, 1); + // size_t const_b = add_constant(chunk, 2); + // add_code(chunk, OP_CONSTANT, 1, 2); + // add_code(chunk, const_b, 1, 2); + // add_code(chunk, OP_MOD, 1, 3); add_code(chunk, OP_RETURN, 1, 1); return chunk; } diff --git a/src/bytecode/darray.h b/src/bytecode/darray.h old mode 100644 new mode 100755 diff --git a/src/bytecode/debug.h b/src/bytecode/debug.h old mode 100644 new mode 100755 diff --git a/src/bytecode/errors.c b/src/bytecode/errors.c old mode 100644 new mode 100755 diff --git a/src/bytecode/errors.h b/src/bytecode/errors.h old mode 100644 new mode 100755 diff --git a/src/bytecode/lexer.c b/src/bytecode/lexer.c old mode 100644 new mode 100755 index 82cdf22..bc2dd9d --- a/src/bytecode/lexer.c +++ b/src/bytecode/lexer.c @@ -17,7 +17,7 @@ static const char* token_str[] = { [TOKEN_FUN] = "TOKEN_FUN", [TOKEN_LAMBDA] = "TOKEN_LAMBDA", [TOKEN_ADD] = "TOKEN_ADD", - [TOKEN_MIN] = "TOKEN_MIN", + [TOKEN_SUB] = "TOKEN_SUB", [TOKEN_MUL] = "TOKEN_MUL", [TOKEN_DIV] = "TOKEN_DIV", [TOKEN_MOD] = "TOKEN_MOD", @@ -147,7 +147,7 @@ find_primitive_type(const StringView value) { if (TOKEN_IS_KEYWORD(value, "fun")) { return TOKEN_FUN; } if (TOKEN_IS_KEYWORD(value, "lambda")) { return TOKEN_LAMBDA; } if (TOKEN_IS_KEYWORD(value, "+")) { return TOKEN_ADD; } - if (TOKEN_IS_KEYWORD(value, "-")) { return TOKEN_MIN; } + if (TOKEN_IS_KEYWORD(value, "-")) { return TOKEN_SUB; } if (TOKEN_IS_KEYWORD(value, "*")) { return TOKEN_MUL; } if (TOKEN_IS_KEYWORD(value, "/")) { return TOKEN_DIV; } if (TOKEN_IS_KEYWORD(value, "%")) { return TOKEN_MOD; } diff --git a/src/bytecode/lexer.h b/src/bytecode/lexer.h old mode 100644 new mode 100755 index a19ac11..47fd384 --- a/src/bytecode/lexer.h +++ b/src/bytecode/lexer.h @@ -28,7 +28,7 @@ typedef enum TokenType { // Arithmetic. TOKEN_ADD, - TOKEN_MIN, + TOKEN_SUB, TOKEN_MUL, TOKEN_DIV, TOKEN_MOD, diff --git a/src/bytecode/main.c b/src/bytecode/main.c old mode 100644 new mode 100755 diff --git a/src/bytecode/objects.h b/src/bytecode/objects.h old mode 100644 new mode 100755 diff --git a/src/bytecode/ops.h b/src/bytecode/ops.h old mode 100644 new mode 100755 diff --git a/src/bytecode/read_line.c b/src/bytecode/read_line.c old mode 100644 new mode 100755 diff --git a/src/bytecode/read_line.h b/src/bytecode/read_line.h old mode 100644 new mode 100755 diff --git a/src/bytecode/string_view.c b/src/bytecode/string_view.c old mode 100644 new mode 100755 diff --git a/src/bytecode/string_view.h b/src/bytecode/string_view.h old mode 100644 new mode 100755 diff --git a/src/bytecode/types.h b/src/bytecode/types.h old mode 100644 new mode 100755 diff --git a/src/bytecode/vm.h b/src/bytecode/vm.h old mode 100644 new mode 100755 index f9e64d1..3a9b5af --- a/src/bytecode/vm.h +++ b/src/bytecode/vm.h @@ -81,7 +81,7 @@ vm_interpret(VM vm, Chunk *chunk) { case OP_SUB: { Object a = array_pop(vm.stack); Object b = array_pop(vm.stack); - array_push(vm.stack, a - b); + array_push(vm.stack, b - a); } break; case OP_MUL: { Object a = array_pop(vm.stack); @@ -91,7 +91,7 @@ vm_interpret(VM vm, Chunk *chunk) { case OP_DIV: { Object a = array_pop(vm.stack); Object b = array_pop(vm.stack); - array_push(vm.stack, a / b); + array_push(vm.stack, b / a); } break; case OP_MOD: { Object a = array_pop(vm.stack); -- cgit v1.2.1