From 2c22f17074581adfcda4cea9a419add082e6029c Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Fri, 28 Jun 2024 19:34:21 +0200 Subject: Add initial implementation of variable compilation --- src/compiler.c | 209 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- src/main.c | 10 ++- src/parser.c | 1 + src/semantic.c | 9 ++- src/vm.c | 136 +++++-------------------------------- 5 files changed, 240 insertions(+), 125 deletions(-) (limited to 'src') diff --git a/src/compiler.c b/src/compiler.c index e303cee..04afd5d 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -5,6 +5,15 @@ #include "parser.c" +typedef struct Variable { + Str name; + Str type; + sz size; + sz offset; +} Variable; + +MAPDEF(StrVarMap, varmap, Str, Variable, str_hash, str_eq) + typedef struct Instruction { u8 dst; u8 a; @@ -33,6 +42,11 @@ typedef struct Chunk { StrIntMap *strmap; sz str_idx; + // Global/Local variables. + Variable *vars; + StrVarMap *varmap; + sz var_off; + // Number of registers currently used in this chunk. sz reg_idx; @@ -46,7 +60,9 @@ typedef enum OpCode { // OP DST A B // --------------------------------------------------------------- // VM/high level instructions. - OP_HALT, // halt + OP_HALT, // halt + OP_STVARI, // stvari vx, ca + OP_STVAR, // stvar vx, ra // Load/Store instructions. OP_LD8K, // ld8k rx, ca -> u8 rx = ca OP_LD16K, // ld16k rx, ca -> u16 rx = ca @@ -129,6 +145,8 @@ typedef enum OpCode { Str op_str[] = { [OP_HALT] = cstr("HALT "), + [OP_STVAR] = cstr("STVAR "), + [OP_STVARI] = cstr("STVARI "), // Load ops. [OP_LD8K] = cstr("LD8K "), [OP_LD16K] = cstr("LD16K "), @@ -210,6 +228,7 @@ Str op_str[] = { }; typedef enum { + COMP_NIL, COMP_CONST, COMP_STRING, COMP_REG, @@ -464,6 +483,54 @@ compile_expr(Chunk *chunk, Node *node) { .idx = map->val, }; } break; + case NODE_LET: { + sz idx = array_size(chunk->vars); + Str name = node->unique_name; + Str type = node->var_name->type; + sz size = 8; + // TODO: get type storage from a table to consider all the basic + // types as well as user defined ones. + if (str_eq(type, cstr("str"))) { + size = 16; + } + Variable var = (Variable){ + .name = name, + .type = type, + .size = size, + .offset = chunk->var_off, + }; + varmap_insert(&chunk->varmap, name, var, chunk->storage); + array_push(chunk->vars, var, chunk->storage); + chunk->var_off += size; + + // Value. + if (node->var_val) { + CompResult res = compile_expr(chunk, node->var_val); + switch (res.type) { + case COMP_CONST: { + EMIT_OP(OP_STVARI, idx, res.idx, 0, node->var_val, + chunk); + } break; + case COMP_REG: { + EMIT_OP(OP_STVAR, idx, res.idx, 0, node->var_val, + chunk); + } break; + default: { + return (CompResult){.type = COMP_ERR}; + } break; + } + } + + return (CompResult){.type = COMP_NIL}; + } break; + case NODE_BLOCK: { + CompResult res; + for (sz i = 0; i < array_size(node->elements); i++) { + Node *root = node->elements[i]; + res = compile_expr(chunk, root); + } + return res; + } break; default: { eprintln("error: compilation not implemented for node %s", node_str[node->kind]); @@ -473,4 +540,144 @@ compile_expr(Chunk *chunk, Node *node) { return (CompResult){.type = COMP_ERR}; } +void +disassemble_instruction(Instruction instruction) { + switch (instruction.op) { + case OP_MOV8: + case OP_MOV16: + case OP_MOV32: + case OP_MOV64: + println("%s r%d, r%d", op_str[instruction.op], instruction.dst, + instruction.a, instruction.b); + break; + case OP_LD8K: + case OP_LD16K: + case OP_LD32K: + case OP_LD64K: + println("%s r%d, c%d", op_str[instruction.op], instruction.dst, + instruction.a, instruction.b); + break; + case OP_LD8I: + case OP_LD16I: + case OP_LD32I: + case OP_LD64I: + case OP_ST8I: + case OP_ST16I: + case OP_ST32I: + case OP_ST64I: + case OP_ADDI: + case OP_SUBI: + case OP_MULI: + case OP_DIVI: + case OP_MODI: + case OP_ADDFI: + case OP_SUBFI: + case OP_MULFI: + case OP_DIVFI: + case OP_MODFI: + case OP_EQI: + case OP_NEQI: + case OP_LTI: + case OP_GTI: + case OP_LEI: + case OP_GEI: + case OP_ANDI: + case OP_ORI: + case OP_BITLSHIFTI: + case OP_BITRSHIFTI: + case OP_BITANDI: + case OP_BITORI: + println("%s r%d, r%d, c%d", op_str[instruction.op], instruction.dst, + instruction.a, instruction.b); + break; + case OP_LD8: + case OP_LD16: + case OP_LD32: + case OP_LD64: + case OP_ST8: + case OP_ST16: + case OP_ST32: + case OP_ST64: + case OP_ADD: + case OP_SUB: + case OP_MUL: + case OP_DIV: + case OP_MOD: + case OP_ADDF: + case OP_SUBF: + case OP_MULF: + case OP_DIVF: + case OP_MODF: + case OP_EQ: + case OP_NEQ: + case OP_LT: + case OP_GT: + case OP_LE: + case OP_GE: + case OP_AND: + case OP_OR: + case OP_BITLSHIFT: + case OP_BITRSHIFT: + case OP_BITAND: + case OP_BITOR: + println("%s r%d, r%d, r%d", op_str[instruction.op], instruction.dst, + instruction.a, instruction.b); + break; + case OP_STVAR: + println("%s v%d, r%d", op_str[instruction.op], instruction.dst, + instruction.a, instruction.b); + break; + case OP_STVARI: + println("%s v%d, c%d", op_str[instruction.op], instruction.dst, + instruction.a, instruction.b); + break; + case OP_BITNOTI: + case OP_NOTI: + println("%s r%d, c%d", op_str[instruction.op], instruction.dst, + instruction.a, instruction.b); + break; + case OP_BITNOT: + case OP_NOT: + println("%s r%d, r%d", op_str[instruction.op], instruction.dst, + instruction.a, instruction.b); + break; + case OP_HALT: println("%s", op_str[instruction.op]); break; + default: println("Unknown opcode %d", instruction.op); break; + } +} + +void +disassemble_chunk(Chunk chunk) { + println("%s: =========== code ===========", chunk.file_name); + for (sz i = 0; i < array_size(chunk.code); i++) { + print("%s: %x{4}: ", chunk.file_name, i); + disassemble_instruction(chunk.code[i]); + } + if (array_size(chunk.constants) > 0) { + println("%s: ========= constants ========", chunk.file_name); + for (sz i = 0; i < array_size(chunk.constants); i++) { + println("%s: %x{2}: %x{8}", chunk.file_name, i, + chunk.constants[i]); + } + } + if (array_size(chunk.strings) > 0) { + println("%s: ========== strings =========", chunk.file_name); + for (sz i = 0; i < array_size(chunk.strings); i++) { + println("%s: %x{2}: %s", chunk.file_name, i, chunk.strings[i]); + } + } + if (array_size(chunk.vars) > 0) { + println("%s: ========= variables ========", chunk.file_name); + for (sz i = 0; i < array_size(chunk.vars); i++) { + println("%s: %x{2}: [%x{4}:%x{4}] %s: %s", chunk.file_name, i, + chunk.vars[i].offset, + chunk.vars[i].offset + chunk.vars[i].size, + chunk.vars[i].name, chunk.vars[i].type); + } + } + println("n_regs: %d, n_vars: %d, n_strings: %d, n_consts: %d", + chunk.reg_idx, array_size(chunk.vars), chunk.str_idx, + chunk.const_idx); +} + #endif // COMPILER_C diff --git a/src/main.c b/src/main.c index 60122b9..618d325 100644 --- a/src/main.c +++ b/src/main.c @@ -176,6 +176,10 @@ process_file(Str path) { // The parser stores the root nodes as a stack. Node *root = parser.nodes[i]; res = compile_expr(&chunk, root); + if (res.type == COMP_ERR) { + eprintln("compilation error..."); + exit(EXIT_FAILURE); + } } sz res_reg = 0; switch (res.type) { @@ -215,8 +219,10 @@ process_file(Str path) { // println("VM REGISTERS BEFORE:\n%{Mem}", // &(Array){.mem = (u8 *)&vm.regs, sizeof(vm.regs)}); vm_run(&vm); - // println("VM REGISTERS AFTER:\n%{Mem}", - // &(Array){.mem = (u8 *)&vm.regs, sizeof(vm.regs)}); + println("VM REGISTERS AFTER:\n%{Mem}", + &(Array){.mem = (u8 *)&vm.regs, sizeof(vm.regs)}); + println("VM MEMORY AFTER:\n%{Mem}", + &(Array){.mem = (u8 *)&vm.stack, sizeof(vm.stack)}); #if DEBUG == 1 println("Space used: %{Arena}", &lexer_arena); diff --git a/src/parser.c b/src/parser.c index f7d0d41..90adaf3 100644 --- a/src/parser.c +++ b/src/parser.c @@ -197,6 +197,7 @@ typedef struct Node { Str type; Str fun_params; Str fun_return; + Str unique_name; } Node; // diff --git a/src/semantic.c b/src/semantic.c index 7158052..1b40723 100644 --- a/src/semantic.c +++ b/src/semantic.c @@ -2,6 +2,7 @@ #define SEMANTIC_C #include "badlib.h" +#include "parser.c" typedef enum { SYM_UNKNOWN, @@ -514,6 +515,10 @@ type_inference(Analyzer *a, Node *node, Scope *scope) { a->storage); node->var_name->type = type; } + symbol = str_concat(cstr("."), symbol, a->storage); + symbol = str_concat(symbol, str_from_int(scope->id, a->storage), + a->storage); + node->unique_name = symbol; return node->type; } break; case NODE_SET: { @@ -843,7 +848,7 @@ type_inference(Analyzer *a, Node *node, Scope *scope) { node->type = cstr("str"); return node->type; } break; - case NODE_ARR_TYPE: + case NODE_ARR_TYPE: case NODE_TYPE: { SymbolMap *type = find_type(scope, node->value.str); if (!type) { @@ -1235,4 +1240,4 @@ symbolic_analysis(Analyzer *a, Parser *parser) { } } -#endif // SEMANTIC_C +#endif // SEMANTIC_C diff --git a/src/vm.c b/src/vm.c index 2771d07..556fbe9 100644 --- a/src/vm.c +++ b/src/vm.c @@ -4,130 +4,12 @@ #include "badlib.h" #include "compiler.c" -void -disassemble_instruction(Instruction instruction) { - switch (instruction.op) { - case OP_MOV8: - case OP_MOV16: - case OP_MOV32: - case OP_MOV64: - println("%s r%d, r%d", op_str[instruction.op], instruction.dst, - instruction.a, instruction.b); - break; - case OP_LD8K: - case OP_LD16K: - case OP_LD32K: - case OP_LD64K: - println("%s r%d, c%d", op_str[instruction.op], instruction.dst, - instruction.a, instruction.b); - break; - case OP_LD8I: - case OP_LD16I: - case OP_LD32I: - case OP_LD64I: - case OP_ST8I: - case OP_ST16I: - case OP_ST32I: - case OP_ST64I: - case OP_ADDI: - case OP_SUBI: - case OP_MULI: - case OP_DIVI: - case OP_MODI: - case OP_ADDFI: - case OP_SUBFI: - case OP_MULFI: - case OP_DIVFI: - case OP_MODFI: - case OP_EQI: - case OP_NEQI: - case OP_LTI: - case OP_GTI: - case OP_LEI: - case OP_GEI: - case OP_ANDI: - case OP_ORI: - case OP_BITLSHIFTI: - case OP_BITRSHIFTI: - case OP_BITANDI: - case OP_BITORI: - println("%s r%d, r%d, c%d", op_str[instruction.op], instruction.dst, - instruction.a, instruction.b); - break; - case OP_LD8: - case OP_LD16: - case OP_LD32: - case OP_LD64: - case OP_ST8: - case OP_ST16: - case OP_ST32: - case OP_ST64: - case OP_ADD: - case OP_SUB: - case OP_MUL: - case OP_DIV: - case OP_MOD: - case OP_ADDF: - case OP_SUBF: - case OP_MULF: - case OP_DIVF: - case OP_MODF: - case OP_EQ: - case OP_NEQ: - case OP_LT: - case OP_GT: - case OP_LE: - case OP_GE: - case OP_AND: - case OP_OR: - case OP_BITLSHIFT: - case OP_BITRSHIFT: - case OP_BITAND: - case OP_BITOR: - println("%s r%d, r%d, r%d", op_str[instruction.op], instruction.dst, - instruction.a, instruction.b); - break; - case OP_BITNOTI: - case OP_NOTI: - println("%s r%d, c%d", op_str[instruction.op], instruction.dst, - instruction.a, instruction.b); - break; - case OP_BITNOT: - case OP_NOT: - println("%s r%d, r%d", op_str[instruction.op], instruction.dst, - instruction.a, instruction.b); - break; - case OP_HALT: println("%s", op_str[instruction.op]); break; - default: println("Unknown opcode %d", instruction.op); break; - } -} - -void -disassemble_chunk(Chunk chunk) { - println("%s: =========== code ===========", chunk.file_name); - for (sz i = 0; i < array_size(chunk.code); i++) { - print("%s: %x{4}: ", chunk.file_name, i); - disassemble_instruction(chunk.code[i]); - } - if (array_size(chunk.constants) > 0) { - println("%s: ========= constants ========", chunk.file_name); - for (sz i = 0; i < array_size(chunk.constants); i++) { - println("%s: %x{2}: %x{8}", chunk.file_name, i, - chunk.constants[i]); - } - } - if (array_size(chunk.strings) > 0) { - println("%s: ========== strings =========", chunk.file_name); - for (sz i = 0; i < array_size(chunk.strings); i++) { - println("%s: %x{2}: %s", chunk.file_name, i, chunk.strings[i]); - } - } -} - #define N_CONST 256 +#define STACK_SIZE KB(64) typedef struct VM { Chunk *chunk; Constant regs[N_CONST]; + u8 stack[STACK_SIZE]; Instruction *ip; } VM; @@ -252,6 +134,20 @@ vm_run(VM *vm) { vm->regs[dst].f = fmod(vm->regs[src_a].f, vm->chunk->constants[src_b].f); } break; + case OP_STVAR: { + u8 dst = instruction.dst; + u8 src = instruction.a; + Variable var = vm->chunk->vars[dst]; + s64 *stack = (s64*)&vm->stack[var.offset]; + *stack = vm->regs[src].i; + } break; + case OP_STVARI: { + u8 dst = instruction.dst; + u8 src = instruction.a; + Variable var = vm->chunk->vars[dst]; + s64 *stack = (s64*)&vm->stack[var.offset]; + *stack = vm->chunk->constants[src].i; + } break; case OP_HALT: { println("VM HALT (int) -> %d", vm->regs[instruction.dst]); println("VM HALT (float) -> %f", vm->regs[instruction.dst]); -- cgit v1.2.1