From 821eeb930dc29d4012feb1f65256835430add6e6 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Fri, 28 Jun 2024 14:07:04 +0200 Subject: Split compiler/vm files --- src/compiler.c | 261 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.c | 4 +- src/semantic.c | 4 + src/vm.c | 253 ++----------------------------------------------------- 4 files changed, 273 insertions(+), 249 deletions(-) create mode 100644 src/compiler.c diff --git a/src/compiler.c b/src/compiler.c new file mode 100644 index 0000000..d1e8a74 --- /dev/null +++ b/src/compiler.c @@ -0,0 +1,261 @@ +#ifndef COMPILER_C +#define COMPILER_C + +#include "badlib.h" + +#include "parser.c" + +typedef struct Instruction { + u8 dst; + u8 a; + u8 b; + u8 op; +} Instruction; + +typedef union Constant { + s64 i; + u64 u; + double f; + ptrsize ptr; +} Constant; + +typedef struct Chunk { + Instruction *code; + + // Constant values that fit in 64 bits. + Constant *constants; + IntIntMap *intmap; + sz const_idx; + + // Constant strings. + Str *strings; + StrIntMap *strmap; + sz str_idx; + + // Number of registers currently used in this chunk. + sz reg_idx; + + // Debugging. + Str file_name; + Arena *storage; + // TODO: line/col info for debugging. +} Chunk; + +typedef enum OpCode { + // OP DST A B + // --------------------------------------------------------------- + // VM/high level instructions. + OP_HALT, // halt + // Load/Store instructions. + OP_LD8K, // ld8k rx, ca -> u8 rx = ca + OP_LD16K, // ld16k rx, ca -> u16 rx = ca + OP_LD32K, // ld32k rx, ca -> u32 rx = ca + OP_LD64K, // ld64k rx, ca -> u64 rx = ca + OP_LD8I, // ld8i rx, ra, cb -> u8 *p; rx = p[ra + cb] + OP_LD16I, // ld16i rx, ra, cb -> u16 *p; rx = p[ra + cb] + OP_LD32I, // ld32i rx, ra, cb -> u32 *p; rx = p[ra + cb] + OP_LD64I, // ld64i rx, ra, cb -> u64 *p; rx = p[ra + cb] + OP_LD8, // ld8 rx, ra, rb -> u8 *p; rx = p[ra + rb] + OP_LD16, // ld16 rx, ra, rb -> u16 *p; rx = p[ra + rb] + OP_LD32, // ld32 rx, ra, rb -> u32 *p; rx = p[ra + rb] + OP_LD64, // ld64 rx, ra, rb -> u64 *p; rx = p[ra + rb] + OP_ST8I, // st8i rx, ra, cb -> u8 *p; p[ra + cb] = rx + OP_ST16I, // st16i rx, ra, cb -> u16 *p; p[ra + cb] = rx + OP_ST32I, // st32i rx, ra, cb -> u32 *p; p[ra + cb] = rx + OP_ST64I, // st64i rx, ra, cb -> u64 *p; p[ra + cb] = rx + OP_ST8, // st8 rx, ra, rb -> u8 *p; p[ra + rb] = rx + OP_ST16, // st16 rx, ra, rb -> u16 *p; p[ra + rb] = rx + OP_ST32, // st32 rx, ra, rb -> u32 *p; p[ra + rb] = rx + OP_ST64, // st64 rx, ra, rb -> u64 *p; p[ra + rb] = rx + // Integer arithmetic (only int/s64 for now). + OP_ADDI, // addk rx, ra, cb + OP_SUBI, // subk rx, ra, cb + OP_MULI, // mulk rx, ra, cb + OP_DIVI, // divk rx, ra, cb + OP_MODI, // modk rx, ra, cb + OP_ADD, // add rx, ra, rb + OP_SUB, // sub rx, ra, rb + OP_MUL, // mul rx, ra, rb + OP_DIV, // div rx, ra, rb + OP_MOD, // mod rx, ra, rb + // Floating point arithmetic (only f64 for now). + OP_ADDFI, // addfk rx, ra, cb + OP_SUBFI, // subfk rx, ra, cb + OP_MULFI, // mulfk rx, ra, cb + OP_DIVFI, // divfk rx, ra, cb + OP_MODFI, // modfk rx, ra, cb + OP_ADDF, // addf rx, ra, rb + OP_SUBF, // subf rx, ra, rb + OP_MULF, // mulf rx, ra, rb + OP_DIVF, // divf rx, ra, rb + OP_MODF, // modf rx, ra, rb + // Register-to-register copy. + OP_MOV8, // mov8 rx, ra -> rx = ra & 0xFF + OP_MOV16, // mov16 rx, ra -> rx = ra & 0xFFFF + OP_MOV32, // mov32 rx, ra -> rx = ra & 0xFFFFFFFF + OP_MOV64, // mov64 rx, ra -> rx = ra & 0xFFFFFFFFFFFFFFFF +} OpCode; + +Str op_str[] = { + [OP_HALT] = cstr("HALT "), + // Load ops. + [OP_LD8K] = cstr("LD8K "), + [OP_LD16K] = cstr("LD16K "), + [OP_LD32K] = cstr("LD32K "), + [OP_LD64K] = cstr("LD64K "), + [OP_LD8I] = cstr("LD8I "), + [OP_LD16I] = cstr("LD16I "), + [OP_LD32I] = cstr("LD32I "), + [OP_LD64I] = cstr("LD64I "), + [OP_LD8] = cstr("LD8 "), + [OP_LD16] = cstr("LD16 "), + [OP_LD32] = cstr("LD32 "), + [OP_LD64] = cstr("LD64 "), + // Store ops. + [OP_ST8I] = cstr("ST8I "), + [OP_ST16I] = cstr("ST16I "), + [OP_ST32I] = cstr("ST32I "), + [OP_ST64I] = cstr("ST64I "), + [OP_ST8] = cstr("ST8 "), + [OP_ST16] = cstr("ST16 "), + [OP_ST32] = cstr("ST32 "), + [OP_ST64] = cstr("ST64 "), + // Arithmetic. + [OP_ADDI] = cstr("ADDI "), + [OP_SUBI] = cstr("SUBI "), + [OP_MULI] = cstr("MULI "), + [OP_DIVI] = cstr("DIVI "), + [OP_MODI] = cstr("MODI "), + [OP_ADD] = cstr("ADD "), + [OP_SUB] = cstr("SUB "), + [OP_MUL] = cstr("MUL "), + [OP_DIV] = cstr("DIV "), + [OP_MOD] = cstr("MOD "), + [OP_ADDFI] = cstr("ADDFI "), + [OP_SUBFI] = cstr("SUBFI "), + [OP_MULFI] = cstr("MULFI "), + [OP_DIVFI] = cstr("DIVFI "), + [OP_MODFI] = cstr("MODFI "), + [OP_ADDF] = cstr("ADDF "), + [OP_SUBF] = cstr("SUBF "), + [OP_MULF] = cstr("MULF "), + [OP_DIVF] = cstr("DIVF "), + // Reg copy/move. + [OP_MODF] = cstr("MODF "), + [OP_MOV8] = cstr("MOV8 "), + [OP_MOV16] = cstr("MOV16 "), + [OP_MOV32] = cstr("MOV32 "), + [OP_MOV64] = cstr("MOV64 "), +}; + +typedef enum { + COMP_CONST, + COMP_STRING, + COMP_REG, + COMP_ERR, +} CompResultType; + +typedef struct CompResult { + sz idx; + CompResultType type; +} CompResult; + +CompResult compile_expr(Chunk *chunk, Node *node); + +#define EMIT_OP(OP, DST, A, B, NODE, CHUNK) \ + do { \ + Instruction inst = (Instruction){ \ + .op = (OP), \ + .dst = (DST), \ + .a = (A), \ + .b = (B), \ + }; \ + array_push((CHUNK)->code, inst, (CHUNK)->storage); \ + } while (0) + +CompResult +compile_binary(OpCode op, Chunk *chunk, Node *node) { + CompResult comp_a = compile_expr(chunk, node->left); + CompResult comp_b = compile_expr(chunk, node->right); + sz reg_a; + sz reg_b; + switch (comp_a.type) { + case COMP_CONST: { + reg_a = chunk->reg_idx++; + EMIT_OP(OP_LD64K, reg_a, comp_a.idx, 0, node, chunk); + } break; + case COMP_REG: { + reg_a = comp_a.idx; + } break; + default: { + return (CompResult){.type = COMP_ERR}; + } break; + } + switch (comp_b.type) { + case COMP_CONST: { + reg_b = chunk->reg_idx++; + EMIT_OP(OP_LD64K, reg_b, comp_b.idx, 0, node, chunk); + } break; + case COMP_REG: { + reg_b = comp_b.idx; + } break; + default: { + return (CompResult){.type = COMP_ERR}; + } break; + } + sz reg_dst = comp_a.idx; // Less registers + // sz reg_dst = chunk->reg_idx++; // Better for optimization + EMIT_OP(op, reg_dst, reg_a, reg_b, node, chunk); + return (CompResult){.type = COMP_REG, .idx = reg_dst}; +} + +CompResult +compile_expr(Chunk *chunk, Node *node) { + switch (node->kind) { + case NODE_ADD: return compile_binary(OP_ADD, chunk, node); break; + case NODE_SUB: return compile_binary(OP_SUB, chunk, node); break; + case NODE_MUL: return compile_binary(OP_MUL, chunk, node); break; + case NODE_DIV: return compile_binary(OP_DIV, chunk, node); break; + case NODE_MOD: return compile_binary(OP_MOD, chunk, node); break; + case NODE_TRUE: + case NODE_FALSE: + case NODE_NUM_FLOAT: + case NODE_NUM_INT: { + sz value = node->value.i; + // Make sure we don't have duplicated constants. + IntIntMap *map = intintmap_lookup(&chunk->intmap, value); + if (!map) { + map = intintmap_insert(&chunk->intmap, value, + chunk->const_idx++, chunk->storage); + Constant c = (Constant){.i = node->value.i}; + array_push(chunk->constants, c, chunk->storage); + } + return (CompResult){ + .type = COMP_CONST, + .idx = map->val, + }; + } break; + case NODE_STRING: { + Str string = node->value.str; + // Make sure we don't have duplicated strings. + StrIntMap *map = strintmap_lookup(&chunk->strmap, string); + if (!map) { + map = strintmap_insert(&chunk->strmap, string, chunk->str_idx++, + chunk->storage); + array_push(chunk->strings, string, chunk->storage); + } + return (CompResult){ + .type = COMP_STRING, + .idx = map->val, + }; + } break; + default: { + eprintln("error: compilation not implemented for node %s", + node_str[node->kind]); + exit(EXIT_FAILURE); + } break; + } + return (CompResult){.type = COMP_ERR}; +} + +#endif // COMPILER_C diff --git a/src/main.c b/src/main.c index fdbf4e0..c52ec74 100644 --- a/src/main.c +++ b/src/main.c @@ -3,6 +3,7 @@ #include #include "badlib.h" +#include "compiler.c" #include "lexer.c" #include "parser.c" #include "semantic.c" @@ -193,6 +194,8 @@ process_file(Str path) { Instruction halt = (Instruction){.op = OP_HALT, .dst = res_reg}; array_push(chunk.code, halt, &bytecode_arena); + disassemble_chunk(chunk); + // Run bytecode on VM. VM vm = {0}; vm_init(&vm, &chunk); @@ -201,7 +204,6 @@ process_file(Str path) { vm_run(&vm); // println("VM REGISTERS AFTER:\n%{Mem}", // &(Array){.mem = (u8 *)&vm.regs, sizeof(vm.regs)}); - disassemble_chunk(chunk); #if DEBUG == 1 println("Space used: %{Arena}", &lexer_arena); diff --git a/src/semantic.c b/src/semantic.c index 428cc53..7158052 100644 --- a/src/semantic.c +++ b/src/semantic.c @@ -1,3 +1,6 @@ +#ifndef SEMANTIC_C +#define SEMANTIC_C + #include "badlib.h" typedef enum { @@ -1232,3 +1235,4 @@ symbolic_analysis(Analyzer *a, Parser *parser) { } } +#endif // SEMANTIC_C diff --git a/src/vm.c b/src/vm.c index cb1b6cd..8d4e67e 100644 --- a/src/vm.c +++ b/src/vm.c @@ -1,143 +1,8 @@ -#include "badlib.h" -#include "parser.c" - -typedef struct Instruction { - u8 dst; - u8 a; - u8 b; - u8 op; -} Instruction; - -typedef union Constant { - s64 i; - u64 u; - double f; - ptrsize ptr; -} Constant; - -typedef struct Chunk { - Instruction *code; - - // Constant values that fit in 64 bits. - Constant *constants; - IntIntMap *intmap; - sz const_idx; - - // Constant strings. - Str *strings; - StrIntMap *strmap; - sz str_idx; +#ifndef VM_C +#define VM_C - // Number of registers currently used in this chunk. - sz reg_idx; - - // Debugging. - Str file_name; - Arena *storage; - // TODO: line/col info for debugging. -} Chunk; - -typedef enum OpCode { - // OP DST A B - // --------------------------------------------------------------- - OP_HALT, // halt - OP_LD8K, // ld8k rx, ca -> u8 rx = ca - OP_LD16K, // ld16k rx, ca -> u16 rx = ca - OP_LD32K, // ld32k rx, ca -> u32 rx = ca - OP_LD64K, // ld64k rx, ca -> u64 rx = ca - OP_LD8I, // ld8i rx, ra, cb -> u8 *p; rx = p[ra + cb] - OP_LD16I, // ld16i rx, ra, cb -> u16 *p; rx = p[ra + cb] - OP_LD32I, // ld32i rx, ra, cb -> u32 *p; rx = p[ra + cb] - OP_LD64I, // ld64i rx, ra, cb -> u64 *p; rx = p[ra + cb] - OP_LD8, // ld8 rx, ra, rb -> u8 *p; rx = p[ra + rb] - OP_LD16, // ld16 rx, ra, rb -> u16 *p; rx = p[ra + rb] - OP_LD32, // ld32 rx, ra, rb -> u32 *p; rx = p[ra + rb] - OP_LD64, // ld64 rx, ra, rb -> u64 *p; rx = p[ra + rb] - OP_ST8I, // st8i rx, ra, cb -> u8 *p; p[ra + cb] = rx - OP_ST16I, // st16i rx, ra, cb -> u16 *p; p[ra + cb] = rx - OP_ST32I, // st32i rx, ra, cb -> u32 *p; p[ra + cb] = rx - OP_ST64I, // st64i rx, ra, cb -> u64 *p; p[ra + cb] = rx - OP_ST8, // st8 rx, ra, rb -> u8 *p; p[ra + rb] = rx - OP_ST16, // st16 rx, ra, rb -> u16 *p; p[ra + rb] = rx - OP_ST32, // st32 rx, ra, rb -> u32 *p; p[ra + rb] = rx - OP_ST64, // st64 rx, ra, rb -> u64 *p; p[ra + rb] = rx - OP_ADDI, // addk rx, ra, cb - OP_SUBI, // subk rx, ra, cb - OP_MULI, // mulk rx, ra, cb - OP_DIVI, // divk rx, ra, cb - OP_MODI, // modk rx, ra, cb - OP_ADD, // add rx, ra, rb - OP_SUB, // sub rx, ra, rb - OP_MUL, // mul rx, ra, rb - OP_DIV, // div rx, ra, rb - OP_MOD, // mod rx, ra, rb - OP_ADDFI, // addk rx, ra, cb - OP_SUBFI, // subk rx, ra, cb - OP_MULFI, // mulk rx, ra, cb - OP_DIVFI, // divk rx, ra, cb - OP_MODFI, // modk rx, ra, cb - OP_ADDF, // add rx, ra, rb - OP_SUBF, // sub rx, ra, rb - OP_MULF, // mul rx, ra, rb - OP_DIVF, // div rx, ra, rb - OP_MODF, // mod rx, ra, rb - OP_MOV8, // mov8 rx, ra -> rx = ra & 0xFF - OP_MOV16, // mov16 rx, ra -> rx = ra & 0xFFFF - OP_MOV32, // mov32 rx, ra -> rx = ra & 0xFFFFFFFF - OP_MOV64, // mov64 rx, ra -> rx = ra & 0xFFFFFFFFFFFFFFFF -} OpCode; - -Str op_str[] = { - [OP_HALT] = cstr("HALT "), - // Load ops. - [OP_LD8K] = cstr("LD8K "), - [OP_LD16K] = cstr("LD16K "), - [OP_LD32K] = cstr("LD32K "), - [OP_LD64K] = cstr("LD64K "), - [OP_LD8I] = cstr("LD8I "), - [OP_LD16I] = cstr("LD16I "), - [OP_LD32I] = cstr("LD32I "), - [OP_LD64I] = cstr("LD64I "), - [OP_LD8] = cstr("LD8 "), - [OP_LD16] = cstr("LD16 "), - [OP_LD32] = cstr("LD32 "), - [OP_LD64] = cstr("LD64 "), - // Store ops. - [OP_ST8I] = cstr("ST8I "), - [OP_ST16I] = cstr("ST16I "), - [OP_ST32I] = cstr("ST32I "), - [OP_ST64I] = cstr("ST64I "), - [OP_ST8] = cstr("ST8 "), - [OP_ST16] = cstr("ST16 "), - [OP_ST32] = cstr("ST32 "), - [OP_ST64] = cstr("ST64 "), - // Arithmetic. - [OP_ADDI] = cstr("ADDI "), - [OP_SUBI] = cstr("SUBI "), - [OP_MULI] = cstr("MULI "), - [OP_DIVI] = cstr("DIVI "), - [OP_MODI] = cstr("MODI "), - [OP_ADD] = cstr("ADD "), - [OP_SUB] = cstr("SUB "), - [OP_MUL] = cstr("MUL "), - [OP_DIV] = cstr("DIV "), - [OP_MOD] = cstr("MOD "), - [OP_ADDFI] = cstr("ADDFI "), - [OP_SUBFI] = cstr("SUBFI "), - [OP_MULFI] = cstr("MULFI "), - [OP_DIVFI] = cstr("DIVFI "), - [OP_MODFI] = cstr("MODFI "), - [OP_ADDF] = cstr("ADDF "), - [OP_SUBF] = cstr("SUBF "), - [OP_MULF] = cstr("MULF "), - [OP_DIVF] = cstr("DIVF "), - // Reg copy/move. - [OP_MODF] = cstr("MODF "), - [OP_MOV8] = cstr("MOV8 "), - [OP_MOV16] = cstr("MOV16 "), - [OP_MOV32] = cstr("MOV32 "), - [OP_MOV64] = cstr("MOV64 "), -}; +#include "badlib.h" +#include "compiler.c" void disassemble_instruction(Instruction instruction) { @@ -308,112 +173,4 @@ vm_run(VM *vm) { } } -typedef enum { - COMP_CONST, - COMP_STRING, - COMP_REG, - COMP_ERR, -} CompResultType; - -typedef struct CompResult { - sz idx; - CompResultType type; -} CompResult; - -CompResult compile_expr(Chunk *chunk, Node *node); - -#define EMIT_OP(OP, DST, A, B, NODE, CHUNK) \ - do { \ - Instruction inst = (Instruction){ \ - .op = (OP), \ - .dst = (DST), \ - .a = (A), \ - .b = (B), \ - }; \ - array_push((CHUNK)->code, inst, (CHUNK)->storage); \ - } while (0) - -CompResult -compile_binary(OpCode op, Chunk *chunk, Node *node) { - CompResult comp_a = compile_expr(chunk, node->left); - CompResult comp_b = compile_expr(chunk, node->right); - sz reg_a; - sz reg_b; - switch (comp_a.type) { - case COMP_CONST: { - reg_a = chunk->reg_idx++; - EMIT_OP(OP_LD64K, reg_a, comp_a.idx, 0, node, chunk); - } break; - case COMP_REG: { - reg_a = comp_a.idx; - } break; - default: { - return (CompResult){.type = COMP_ERR}; - } break; - } - switch (comp_b.type) { - case COMP_CONST: { - reg_b = chunk->reg_idx++; - EMIT_OP(OP_LD64K, reg_b, comp_b.idx, 0, node, chunk); - } break; - case COMP_REG: { - reg_b = comp_b.idx; - } break; - default: { - return (CompResult){.type = COMP_ERR}; - } break; - } - sz reg_dst = comp_a.idx; // Less registers - // sz reg_dst = chunk->reg_idx++; // Better for optimization - EMIT_OP(op, reg_dst, reg_a, reg_b, node, chunk); - return (CompResult){.type = COMP_REG, .idx = reg_dst}; -} - -CompResult -compile_expr(Chunk *chunk, Node *node) { - switch (node->kind) { - case NODE_ADD: return compile_binary(OP_ADD, chunk, node); break; - case NODE_SUB: return compile_binary(OP_SUB, chunk, node); break; - case NODE_MUL: return compile_binary(OP_MUL, chunk, node); break; - case NODE_DIV: return compile_binary(OP_DIV, chunk, node); break; - case NODE_MOD: return compile_binary(OP_MOD, chunk, node); break; - case NODE_TRUE: - case NODE_FALSE: - case NODE_NUM_FLOAT: - case NODE_NUM_INT: { - sz value = node->value.i; - // Make sure we don't have duplicated constants. - IntIntMap *map = intintmap_lookup(&chunk->intmap, value); - if (!map) { - map = intintmap_insert(&chunk->intmap, value, - chunk->const_idx++, chunk->storage); - Constant c = (Constant){.i = node->value.i}; - array_push(chunk->constants, c, chunk->storage); - } - return (CompResult){ - .type = COMP_CONST, - .idx = map->val, - }; - } break; - case NODE_STRING: { - Str string = node->value.str; - // Make sure we don't have duplicated strings. - StrIntMap *map = strintmap_lookup(&chunk->strmap, string); - if (!map) { - map = strintmap_insert(&chunk->strmap, string, chunk->str_idx++, - chunk->storage); - array_push(chunk->strings, string, chunk->storage); - } - return (CompResult){ - .type = COMP_STRING, - .idx = map->val, - }; - } break; - default: { - eprintln("error: compilation not implemented for node %s", - node_str[node->kind]); - exit(EXIT_FAILURE); - } break; - } - return (CompResult){.type = COMP_ERR}; -} +#endif // VM_C -- cgit v1.2.1