#ifndef COMPILER_C #define COMPILER_C #include "badlib.h" #include "parser.c" #include "semantic.c" typedef struct Instruction { u16 dst; u16 a; u16 b; u16 op; } Instruction; typedef union Constant { s64 i; u64 u; f64 f; ptrsize ptr; } Constant; typedef struct LineCol { sz line; sz col; } LineCol; typedef struct Chunk { sz id; Str name; Instruction *code; IntIntMap *labels; // label -> chunk_index sz labels_idx; // Constant values that fit in 64 bits. Constant *constants; IntIntMap *intmap; sz const_idx; // Constant strings. Str *strings; StrIntMap *strmap; sz str_idx; // Number of registers currently used in this chunk. sz reg_idx; // Associated function metadata. Function fun; // Debugging. Str file_name; Arena *storage; LineCol *linecol; } Chunk; typedef struct Compiler { Str file_name; Arena *storage; // Quick search tables for base types. StrSet *numeric_types; StrSet *integer_types; StrSet *uint_types; StrSet *sint_types; StrSet *float_types; SymbolMap *symbols; // All the compiled chunks for this file. struct Chunk **chunks; // Destinations. sz lab_pre; sz lab_post; sz reg_addr; bool has_addr; } Compiler; typedef enum OpCode { // OP DST A B // --------------------------------------------------------------- // VM/high level instructions. OP_HALT, // halt // NOTE: LDGVAR/STGVAR* could be obtained in terms of LDGADDR. OP_STGVARI, // stgvari vx, ca OP_STGVAR, // stgvar vx, ra OP_LDGVAR, // ldgvar rx, va OP_LDGADDR, // ldgaddr rx, va OP_STLVARI, // stlvari vx, ca OP_STLVAR, // stlvar vx, ra OP_LDLVAR, // ldlvar rx, va OP_LDLADDR, // ldladdr rx, va OP_LDFUNPTR, // ldladdr rx, fa ; Load the address of function fa into rx. OP_LDSTR, // ldstr rx, sa ; Stores the address of the string sa into rx OP_MEMCPY, // memcpy rx, ra, rb ; memcpy(dst = rx, src = ra, rb = nbytes) OP_MEMCPYI, // memcpyi rx, ra, cb // Functions. OP_CALL, // call fx ; Call the function fx OP_RECUR, // recur fx ; Jump to fx without changing the stack. OP_RECUR_SELF, // recur ; Jump to the beginning of the current fx. OP_RET, // ret ; Returns from current function OP_RESERVE, // reserve cx ; Increments the stack pointer by cx bytes OP_POP, // pop rx ; Pops the last value of the stack into rx. OP_PUSH, // push rx ; Push the rx value to the stack. OP_PUSHI, // pushi cx ; Push the cx value to the stack. OP_PUTRET, // putret rx ; Put rx into the return value memory. OP_PUTRETI, // putreti cx ; Put cx into the return value memory. // Printing values with builtin print/println functions. OP_PRINTSTR, // p rx OP_PRINTSTRI, // p cx OP_PRINTS8, // p rx OP_PRINTS8I, // p cx OP_PRINTS16, // p rx OP_PRINTS16I, // p cx OP_PRINTS32, // p rx OP_PRINTS32I, // p cx OP_PRINTS64, // p rx OP_PRINTS64I, // p cx OP_PRINTU8, // p rx OP_PRINTU8I, // p cx OP_PRINTU16, // p rx OP_PRINTU16I, // p cx OP_PRINTU32, // p rx OP_PRINTU32I, // p cx OP_PRINTU64, // p rx OP_PRINTU64I, // p cx OP_PRINTF64, // p rx OP_PRINTF64I, // p cx OP_PRINTF32, // p cx OP_PRINTF32I, // p cx OP_PRINTBOOL, // p rx OP_PRINTBOOLI, // p cx // Load/Store instructions. OP_LDCONST, // ldconst rx, ca -> u64 rx = ca OP_LD8K, // ld8k rx, ra -> u8 *p = ra; rx = *p OP_LD16K, // ld16k rx, ra -> u16 *p = ra; rx = *p OP_LD32K, // ld32k rx, ra -> u32 *p = ra; rx = *p OP_LD64K, // ld64k rx, ra -> u64 *p = ra; rx = *p OP_ST8K, // ld8k rx, ra -> u8 *p = ra; *p = rx OP_ST16K, // ld16k rx, ra -> u16 *p = ra; *p = rx OP_ST32K, // ld32k rx, ra -> u32 *p = ra; *p = rx OP_ST64K, // ld64k rx, ra -> u64 *p = ra; *p = rx OP_LD8I, // ld8i rx, ra, cb -> u8 *p = ra; rx = p[cb] OP_LD16I, // ld16i rx, ra, cb -> u16 *p = ra; rx = p[cb] OP_LD32I, // ld32i rx, ra, cb -> u32 *p = ra; rx = p[cb] OP_LD64I, // ld64i rx, ra, cb -> u64 *p = ra; rx = p[cb] OP_LD8, // ld8 rx, ra, rb -> u8 *p = ra; rx = p[rb] OP_LD16, // ld16 rx, ra, rb -> u16 *p = ra; rx = p[rb] OP_LD32, // ld32 rx, ra, rb -> u32 *p = ra; rx = p[rb] OP_LD64, // ld64 rx, ra, rb -> u64 *p = ra; rx = p[rb] OP_ST8I, // st8i rx, ra, cb -> u8 *p = ra; p[cb] = rx OP_ST16I, // st16i rx, ra, cb -> u16 *p = ra; p[cb] = rx OP_ST32I, // st32i rx, ra, cb -> u32 *p = ra; p[cb] = rx OP_ST64I, // st64i rx, ra, cb -> u64 *p = ra; p[cb] = rx OP_ST8, // st8 rx, ra, rb -> u8 *p = ra; p[rb] = rx OP_ST16, // st16 rx, ra, rb -> u16 *p = ra; p[rb] = rx OP_ST32, // st32 rx, ra, rb -> u32 *p = ra; p[rb] = rx OP_ST64, // st64 rx, ra, rb -> u64 *p = ra; p[rb] = rx // Integer arithmetic (only int/s64 for now). OP_ADDI, // addi rx, ra, cb OP_SUBI, // subi rx, ra, cb OP_MULI, // muli rx, ra, cb OP_DIVI, // divi rx, ra, cb OP_MODI, // modi rx, ra, cb OP_ADD, // add rx, ra, rb OP_SUB, // sub rx, ra, rb OP_MUL, // mul rx, ra, rb OP_DIV, // div rx, ra, rb OP_MOD, // mod rx, ra, rb // Floating point arithmetic (only f64 for now). OP_ADDFI, // addfi rx, ra, cb OP_SUBFI, // subfi rx, ra, cb OP_MULFI, // mulfi rx, ra, cb OP_DIVFI, // divfi rx, ra, cb OP_MODFI, // modfi rx, ra, cb OP_ADDF, // addf rx, ra, rb OP_SUBF, // subf rx, ra, rb OP_MULF, // mulf rx, ra, rb OP_DIVF, // divf rx, ra, rb OP_MODF, // modf rx, ra, rb // Register-to-register copy. OP_MOV8, // mov8 rx, ra -> rx = ra & 0xFF OP_MOV16, // mov16 rx, ra -> rx = ra & 0xFFFF OP_MOV32, // mov32 rx, ra -> rx = ra & 0xFFFFFFFF OP_MOV64, // mov64 rx, ra -> rx = ra & 0xFFFFFFFFFFFFFFFF OP_MOV8I, // mov8 rx, ca -> rx = ca & 0xFF OP_MOV16I, // mov16 rx, ca -> rx = ca & 0xFFFF OP_MOV32I, // mov32 rx, ca -> rx = ca & 0xFFFFFFFF OP_MOV64I, // mov64 rx, ca -> rx = ca & 0xFFFFFFFFFFFFFFFF // Logic operations (only 64 bits for now). OP_EQI, // eqi rx, ra, cb OP_NEQI, // neqi rx, ra, cb OP_LTI, // lti rx, ra, cb OP_GTI, // gti rx, ra, cb OP_LEI, // lei rx, ra, cb OP_GEI, // gei rx, ra, cb OP_ANDI, // andi rx, ra, cb OP_ORI, // ori rx, ra, cb OP_NOTI, // noti rx, ra OP_EQ, // eq rx, ra, rb OP_NEQ, // neq rx, ra, rb OP_LT, // lt rx, ra, rb OP_GT, // gt rx, ra, rb OP_LE, // le rx, ra, rb OP_GE, // ge rx, ra, rb OP_AND, // and rx, ra, rb OP_OR, // or rx, ra, rb OP_NOT, // not rx, ra // Bitwise operations. OP_BITLSHIFTI, // shli rx, ra, cb OP_BITRSHIFTI, // shri rx, ra, cb OP_BITANDI, // bandi rx, ra, cb OP_BITORI, // bori rx, ra, cb OP_BITXORI, // bxor rx, ra, cb OP_BITNOTI, // bnoti rx, ca OP_BITLSHIFT, // shl rx, ra, rb OP_BITRSHIFT, // shr rx, ra, rb OP_BITAND, // band rx, ra, rb OP_BITOR, // bor rx, ra, rb OP_BITXOR, // bxor rx, ra, rb OP_BITNOT, // bnot rx, ra // Jump instructions. OP_JMP, // jmp lx ; jmp to label lx OP_JMPF, // jmpf lx, rx ; jmp to label lx if rx is false OP_JMPT, // jmpt lx, rx ; jmp to label lx if rx is true OP_JMPFI, // jmpf lx, cx ; jmp to label lx if rx is false OP_JMPTI, // jmpt lx, cx ; jmp to label lx if rx is true _OP_NUM, } OpCode; Str op_str[] = { // High level ops. [OP_HALT] = cstr("HALT"), [OP_STGVAR] = cstr("STGVAR"), [OP_STGVARI] = cstr("STGVARI"), [OP_LDGVAR] = cstr("LDGVAR"), [OP_LDGADDR] = cstr("LDGADDR"), [OP_STLVAR] = cstr("STLVAR"), [OP_STLVARI] = cstr("STLVARI"), [OP_LDLVAR] = cstr("LDLVAR"), [OP_LDLADDR] = cstr("LDLADDR"), [OP_LDFUNPTR] = cstr("LDFUNPTR"), [OP_LDSTR] = cstr("LDSTR"), [OP_PRINTSTR] = cstr("PRNSTR"), [OP_PRINTSTRI] = cstr("PRNSTRI"), [OP_PRINTS8] = cstr("PRNS8"), [OP_PRINTS8I] = cstr("PRNS8I"), [OP_PRINTS16] = cstr("PRNS16"), [OP_PRINTS16I] = cstr("PRNS16I"), [OP_PRINTS32] = cstr("PRNS32"), [OP_PRINTS32I] = cstr("PRNS32I"), [OP_PRINTS64] = cstr("PRNS64"), [OP_PRINTS64I] = cstr("PRNS64I"), [OP_PRINTU8] = cstr("PRNU8"), [OP_PRINTU8I] = cstr("PRNU8I"), [OP_PRINTU16] = cstr("PRNU16"), [OP_PRINTU16I] = cstr("PRNU16I"), [OP_PRINTU32] = cstr("PRNU32"), [OP_PRINTU32I] = cstr("PRNU32I"), [OP_PRINTU64] = cstr("PRNU64"), [OP_PRINTU64I] = cstr("PRNU64I"), [OP_PRINTF32] = cstr("PRNF32"), [OP_PRINTF32I] = cstr("PRNF32I"), [OP_PRINTF64] = cstr("PRNF64"), [OP_PRINTF64I] = cstr("PRNF64I"), [OP_PRINTBOOL] = cstr("PRNBOOL"), [OP_PRINTBOOLI] = cstr("PRNBOOLI"), [OP_PUTRET] = cstr("PUTRET"), [OP_PUTRETI] = cstr("PUTRETI"), [OP_MEMCPY] = cstr("MEMCPY"), [OP_MEMCPYI] = cstr("MEMCPYI"), // Functions. [OP_CALL] = cstr("CALL"), [OP_RECUR] = cstr("RECUR"), [OP_RECUR_SELF] = cstr("RECURSLF"), [OP_RET] = cstr("RET"), [OP_RESERVE] = cstr("RESERVE"), [OP_POP] = cstr("POP"), [OP_PUSH] = cstr("PUSH"), [OP_PUSHI] = cstr("PUSHI"), // Load ops. [OP_LDCONST] = cstr("LDCONST"), [OP_LD8K] = cstr("LD8K"), [OP_LD16K] = cstr("LD16K"), [OP_LD32K] = cstr("LD32K"), [OP_LD64K] = cstr("LD64K"), [OP_ST8K] = cstr("ST8K"), [OP_ST16K] = cstr("ST16K"), [OP_ST32K] = cstr("ST32K"), [OP_ST64K] = cstr("ST64K"), [OP_LD8I] = cstr("LD8I"), [OP_LD16I] = cstr("LD16I"), [OP_LD32I] = cstr("LD32I"), [OP_LD64I] = cstr("LD64I"), [OP_LD8] = cstr("LD8"), [OP_LD16] = cstr("LD16"), [OP_LD32] = cstr("LD32"), [OP_LD64] = cstr("LD64"), // Store ops. [OP_ST8I] = cstr("ST8I"), [OP_ST16I] = cstr("ST16I"), [OP_ST32I] = cstr("ST32I"), [OP_ST64I] = cstr("ST64I"), [OP_ST8] = cstr("ST8"), [OP_ST16] = cstr("ST16"), [OP_ST32] = cstr("ST32"), [OP_ST64] = cstr("ST64"), // Arithmetic. [OP_ADDI] = cstr("ADDI"), [OP_SUBI] = cstr("SUBI"), [OP_MULI] = cstr("MULI"), [OP_DIVI] = cstr("DIVI"), [OP_MODI] = cstr("MODI"), [OP_ADD] = cstr("ADD"), [OP_SUB] = cstr("SUB"), [OP_MUL] = cstr("MUL"), [OP_DIV] = cstr("DIV"), [OP_MOD] = cstr("MOD"), [OP_ADDFI] = cstr("ADDFI"), [OP_SUBFI] = cstr("SUBFI"), [OP_MULFI] = cstr("MULFI"), [OP_DIVFI] = cstr("DIVFI"), [OP_MODFI] = cstr("MODFI"), [OP_ADDF] = cstr("ADDF"), [OP_SUBF] = cstr("SUBF"), [OP_MULF] = cstr("MULF"), [OP_DIVF] = cstr("DIVF"), // Reg copy/move. [OP_MODF] = cstr("MODF"), [OP_MOV8] = cstr("MOV8"), [OP_MOV16] = cstr("MOV16"), [OP_MOV32] = cstr("MOV32"), [OP_MOV64] = cstr("MOV64"), [OP_MOV8I] = cstr("MOV8I"), [OP_MOV16I] = cstr("MOV16I"), [OP_MOV32I] = cstr("MOV32I"), [OP_MOV64I] = cstr("MOV64I"), // Logic operations. [OP_EQI] = cstr("EQI"), [OP_NEQI] = cstr("NEQI"), [OP_LTI] = cstr("LTI"), [OP_GTI] = cstr("GTI"), [OP_LEI] = cstr("LEI"), [OP_GEI] = cstr("GEI"), [OP_ANDI] = cstr("ANDI"), [OP_ORI] = cstr("ORI"), [OP_NOTI] = cstr("NOTI"), [OP_EQ] = cstr("EQ"), [OP_NEQ] = cstr("NEQ"), [OP_LT] = cstr("LT"), [OP_GT] = cstr("GT"), [OP_LE] = cstr("LE"), [OP_GE] = cstr("GE"), [OP_AND] = cstr("AND"), [OP_OR] = cstr("OR"), [OP_NOT] = cstr("NOT"), // Bitwise operations. [OP_BITLSHIFTI] = cstr("LSHI"), [OP_BITRSHIFTI] = cstr("RSHI"), [OP_BITANDI] = cstr("BANDI"), [OP_BITORI] = cstr("BORI"), [OP_BITXORI] = cstr("BXORI"), [OP_BITNOTI] = cstr("BNOTI"), [OP_BITLSHIFT] = cstr("LSH"), [OP_BITRSHIFT] = cstr("RSH"), [OP_BITAND] = cstr("BAND"), [OP_BITOR] = cstr("BOR"), [OP_BITXOR] = cstr("XBOR"), [OP_BITNOT] = cstr("BNOT"), // Jump instructions. [OP_JMP] = cstr("JMP"), [OP_JMPF] = cstr("JMPF"), [OP_JMPT] = cstr("JMPT"), [OP_JMPFI] = cstr("JMPFI"), [OP_JMPTI] = cstr("JMPTI"), }; typedef enum { COMP_NIL, COMP_CONST, COMP_STRING, COMP_REG, COMP_RET, COMP_ERR, } CompResultType; typedef struct CompResult { sz idx; CompResultType type; } CompResult; CompResult compile_expr(Compiler *compiler, Chunk *chunk, Node *node); void disassemble_chunk(Chunk chunk); sz add_constant(Chunk *chunk, sz value) { IntIntMap *map = intintmap_lookup(&chunk->intmap, value); // Make sure we don't have duplicated constants. if (!map) { map = intintmap_insert(&chunk->intmap, value, chunk->const_idx++, chunk->storage); Constant c = (Constant){.i = value}; array_push(chunk->constants, c, chunk->storage); } return map->val; } sz add_string(Chunk *chunk, Str string) { // Make sure we don't have duplicated string. StrIntMap *map = strintmap_lookup(&chunk->strmap, string); if (!map) { map = strintmap_insert(&chunk->strmap, string, chunk->str_idx++, chunk->storage); array_push(chunk->strings, string, chunk->storage); } return map->val; } void emit_op(OpCode op, sz dst, sz a, sz b, Node *node, Chunk *chunk) { Instruction inst = (Instruction){ .op = op, .dst = dst, .a = a, .b = b, }; array_push(chunk->code, inst, chunk->storage); LineCol linecol = (LineCol){0}; if (node) { linecol = (LineCol){.line = node->line, .col = node->col}; } array_push(chunk->linecol, linecol, chunk->storage); } void emit_sized_op(sz size, OpCode op64, OpCode op32, OpCode op16, OpCode op8, sz dst, sz a, sz b, Node *node, Chunk *chunk) { if (size == 8) { emit_op(op64, dst, a, b, node, chunk); } else if (size == 4) { emit_op(op32, dst, a, b, node, chunk); } else if (size == 2) { emit_op(op16, dst, a, b, node, chunk); } else if (size == 1) { emit_op(op8, dst, a, b, node, chunk); } } void emit_fat_copy(Chunk *chunk, Node *node, sz dst_addr, sz src_addr) { sz reg_dst = chunk->reg_idx++; // Store the fat string pointer into the variable. sz zero = add_constant(chunk, 0); sz one = add_constant(chunk, 1); // Get the value for the first word of the string // pointer. emit_op(OP_LD64I, reg_dst, src_addr, zero, node, chunk); emit_op(OP_ST64I, reg_dst, dst_addr, zero, node, chunk); emit_op(OP_LD64I, reg_dst, src_addr, one, node, chunk); emit_op(OP_ST64I, reg_dst, dst_addr, one, node, chunk); } void emit_compile_err(Compiler *compiler, Chunk *chunk, Node *node) { disassemble_chunk(*chunk); eprintln("%s:%d:%d: error: compilation error on: %s", compiler->file_name, node->line, node->col, node_str[node->kind]); exit(EXIT_FAILURE); } CompResult compile_binary(Compiler *compiler, Chunk *chunk, Node *node) { OpCode op = OP_HALT; OpCode opi = OP_HALT; OpCode ldop = OP_LDCONST; switch (node->kind) { // Arithmetic. case NODE_ADD: { if (strset_lookup(&compiler->integer_types, node->type)) { op = OP_ADD; opi = OP_ADDI; } else if (strset_lookup(&compiler->float_types, node->type)) { op = OP_ADDF; opi = OP_ADDFI; } } break; case NODE_SUB: { if (strset_lookup(&compiler->integer_types, node->type)) { op = OP_SUB; opi = OP_SUBI; } else if (strset_lookup(&compiler->float_types, node->type)) { op = OP_SUBF; opi = OP_SUBFI; } } break; case NODE_MUL: { if (strset_lookup(&compiler->integer_types, node->type)) { op = OP_MUL; opi = OP_MULI; } else if (strset_lookup(&compiler->float_types, node->type)) { op = OP_MULF; opi = OP_MULFI; } } break; case NODE_DIV: { if (strset_lookup(&compiler->integer_types, node->type)) { op = OP_DIV; opi = OP_DIVI; } else if (strset_lookup(&compiler->float_types, node->type)) { op = OP_DIVF; opi = OP_DIVFI; } } break; case NODE_MOD: { if (strset_lookup(&compiler->integer_types, node->type)) { op = OP_MOD; opi = OP_MODI; } else if (strset_lookup(&compiler->float_types, node->type)) { op = OP_MODF; opi = OP_MODFI; } } break; // Logic. case NODE_EQ: { op = OP_EQ; opi = OP_EQI; } break; case NODE_NEQ: { op = OP_NEQ; opi = OP_NEQI; } break; case NODE_LT: { op = OP_LT; opi = OP_LTI; } break; case NODE_GT: { op = OP_GT; opi = OP_GTI; } break; case NODE_LE: { op = OP_LE; opi = OP_LEI; } break; case NODE_GE: { op = OP_GE; opi = OP_GEI; } break; // Bitwise. case NODE_BITOR: { op = OP_BITOR; opi = OP_BITORI; } break; case NODE_BITXOR: { op = OP_BITXOR; opi = OP_BITXORI; } break; case NODE_BITAND: { op = OP_BITAND; opi = OP_BITANDI; } break; case NODE_BITLSHIFT: { op = OP_BITLSHIFT; opi = OP_BITLSHIFTI; } break; case NODE_BITRSHIFT: { op = OP_BITRSHIFT; opi = OP_BITRSHIFTI; } break; default: break; } CompResult comp_a = compile_expr(compiler, chunk, node->binary.left); CompResult comp_b = compile_expr(compiler, chunk, node->binary.right); sz reg_a; sz reg_b; switch (comp_a.type) { case COMP_CONST: { reg_a = chunk->reg_idx++; emit_op(ldop, reg_a, comp_a.idx, 0, node, chunk); } break; case COMP_REG: { reg_a = comp_a.idx; } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } switch (comp_b.type) { case COMP_CONST: { reg_b = comp_b.idx; op = opi; } break; case COMP_REG: { reg_b = comp_b.idx; } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } sz reg_dst = chunk->reg_idx++; // Better for optimization emit_op(op, reg_dst, reg_a, reg_b, node, chunk); return (CompResult){.type = COMP_REG, .idx = reg_dst}; } CompResult compile_binary_logic(Compiler *compiler, Chunk *chunk, Node *node) { // Logical functions have to shortcircuit once the answer is known. OpCode op = OP_HALT; OpCode opi = OP_HALT; OpCode ldop = OP_LDCONST; OpCode jmpop = OP_HALT; OpCode jmpopi = OP_HALT; bool default_value = false; switch (node->kind) { case NODE_AND: { op = OP_AND; opi = OP_ANDI; jmpop = OP_JMPF; jmpopi = OP_JMPFI; default_value = false; } break; case NODE_OR: { op = OP_OR; opi = OP_ORI; jmpop = OP_JMPT; jmpopi = OP_JMPTI; default_value = true; } break; default: break; } sz lab0 = chunk->labels_idx++; sz lab1 = chunk->labels_idx++; CompResult comp_a = compile_expr(compiler, chunk, node->binary.left); sz reg_a; switch (comp_a.type) { case COMP_CONST: { emit_op(jmpopi, lab0, comp_a.idx, 0, node->binary.left, chunk); reg_a = chunk->reg_idx++; emit_op(ldop, reg_a, comp_a.idx, 0, node, chunk); } break; case COMP_REG: { emit_op(jmpop, lab0, comp_a.idx, 0, node->binary.left, chunk); reg_a = comp_a.idx; } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } CompResult comp_b = compile_expr(compiler, chunk, node->binary.right); sz reg_b; switch (comp_b.type) { case COMP_CONST: { reg_b = comp_b.idx; op = opi; } break; case COMP_REG: { reg_b = comp_b.idx; } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } sz reg_dst = chunk->reg_idx++; emit_op(op, reg_dst, reg_a, reg_b, node, chunk); // Jump to the end of this comparison. emit_op(OP_JMP, lab1, 0, 0, node, chunk); sz pos0 = array_size(chunk->code); // Load default value. sz defaul_const = add_constant(chunk, default_value); emit_op(ldop, reg_dst, defaul_const, 0, node, chunk); sz pos1 = array_size(chunk->code); // Register labels. intintmap_insert(&chunk->labels, lab0, pos0, chunk->storage); intintmap_insert(&chunk->labels, lab1, pos1, chunk->storage); return (CompResult){.type = COMP_REG, .idx = reg_dst}; } CompResult compile_unary(Compiler *compiler, Chunk *chunk, Node *node) { OpCode op = OP_HALT; OpCode opi = OP_HALT; switch (node->kind) { case NODE_NOT: { op = OP_NOT; opi = OP_NOTI; } break; case NODE_BITNOT: { op = OP_BITNOT; opi = OP_BITNOTI; } break; default: break; } CompResult comp_a = compile_expr(compiler, chunk, node->binary.left); sz reg_a; switch (comp_a.type) { case COMP_CONST: { reg_a = comp_a.idx; op = opi; } break; case COMP_REG: { reg_a = comp_a.idx; } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } sz reg_dst = chunk->reg_idx++; emit_op(op, reg_dst, reg_a, 0, node, chunk); return (CompResult){.type = COMP_REG, .idx = reg_dst}; } CompResult compile_if(Compiler *compiler, Chunk *chunk, Node *node) { CompResult cond = compile_expr(compiler, chunk, node->ifelse.cond); OpCode jmpop; switch (cond.type) { case COMP_CONST: { jmpop = OP_JMPFI; } break; case COMP_REG: { jmpop = OP_JMPF; } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } if (!str_eq(node->type, cstr("nil")) && !str_has_prefix(node->type, cstr("ret:")) && !str_has_prefix(node->type, cstr("flow:"))) { sz reg_dst = chunk->reg_idx++; // Jump to the `false` branch. sz lab0 = chunk->labels_idx++; emit_op(jmpop, lab0, cond.idx, 0, node->ifelse.cond, chunk); // Condition is true. CompResult then_expr = compile_expr(compiler, chunk, node->ifelse.expr_true); switch (then_expr.type) { case COMP_CONST: { emit_op(OP_LDCONST, reg_dst, then_expr.idx, 0, node->ifelse.cond, chunk); } break; case COMP_REG: { emit_op(OP_MOV64, reg_dst, then_expr.idx, 0, node->ifelse.cond, chunk); } break; case COMP_RET: break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } // Jump to the end of the expression. sz pos0 = array_size(chunk->code); sz lab1 = chunk->labels_idx++; emit_op(OP_JMP, lab1, 0, 0, node->ifelse.expr_else, chunk); // Else expression. CompResult else_expr = compile_expr(compiler, chunk, node->ifelse.expr_else); switch (else_expr.type) { case COMP_CONST: { emit_op(OP_LDCONST, reg_dst, else_expr.idx, 0, node->ifelse.expr_else, chunk); } break; case COMP_REG: { emit_op(OP_MOV64, reg_dst, else_expr.idx, 0, node->ifelse.expr_else, chunk); } break; case COMP_RET: break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } sz pos1 = array_size(chunk->code); // Update labels. intintmap_insert(&chunk->labels, lab0, pos0 + 1, chunk->storage); intintmap_insert(&chunk->labels, lab1, pos1, chunk->storage); return (CompResult){.type = COMP_REG, .idx = reg_dst}; } // Jump to the `false` branch. sz lab0 = chunk->labels_idx++; emit_op(jmpop, lab0, cond.idx, 0, node->ifelse.cond, chunk); // Condition is true. compile_expr(compiler, chunk, node->ifelse.expr_true); // Jump to the end of the expression. sz pos0 = array_size(chunk->code); sz lab1 = chunk->labels_idx++; emit_op(OP_JMP, lab1, 0, 0, node->ifelse.expr_else, chunk); // Else expression. if (node->ifelse.expr_else) { compile_expr(compiler, chunk, node->ifelse.expr_else); } sz pos1 = array_size(chunk->code); // Update labels. intintmap_insert(&chunk->labels, lab0, pos0 + 1, chunk->storage); intintmap_insert(&chunk->labels, lab1, pos1, chunk->storage); return (CompResult){.type = COMP_NIL}; } CompResult compile_cond(Compiler *compiler, Chunk *chunk, Node *node) { if (str_eq(node->type, cstr("nil"))) { sz lab1 = chunk->labels_idx++; for (sz i = 0; i < array_size(node->match.cases); i++) { // condition = expression Node *expr = node->match.cases[i]; if (expr->case_entry.cond) { CompResult cond = compile_expr(compiler, chunk, expr->case_entry.cond); OpCode jmpop; switch (cond.type) { case COMP_CONST: { jmpop = OP_JMPFI; } break; case COMP_REG: { jmpop = OP_JMPF; } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } // Jump to the `next` branch. sz lab0 = chunk->labels_idx++; emit_op(jmpop, lab0, cond.idx, 0, expr->case_entry.expr, chunk); // Condition is true. compile_expr(compiler, chunk, expr->case_entry.expr); if (i != array_size(node->match.cases) - 1) { // Jump to the end of the expression. sz pos0 = array_size(chunk->code); emit_op(OP_JMP, lab1, 0, 0, node->ifelse.expr_else, chunk); intintmap_insert(&chunk->labels, lab0, pos0 + 1, chunk->storage); } } else { compile_expr(compiler, chunk, expr->case_entry.expr); break; } } sz pos1 = array_size(chunk->code); intintmap_insert(&chunk->labels, lab1, pos1, chunk->storage); return (CompResult){.type = COMP_NIL}; } sz reg_dst = chunk->reg_idx++; sz lab1 = chunk->labels_idx++; for (sz i = 0; i < array_size(node->match.cases); i++) { // condition = expression Node *expr = node->match.cases[i]; if (expr->case_entry.cond) { CompResult cond = compile_expr(compiler, chunk, expr->case_entry.cond); OpCode jmpop; switch (cond.type) { case COMP_CONST: { jmpop = OP_JMPFI; } break; case COMP_REG: { jmpop = OP_JMPF; } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } // Jump to the `next` branch. sz lab0 = chunk->labels_idx++; emit_op(jmpop, lab0, cond.idx, 0, expr->case_entry.expr, chunk); // Condition is true. CompResult then_expr = compile_expr(compiler, chunk, expr->case_entry.expr); switch (then_expr.type) { case COMP_CONST: { emit_op(OP_LDCONST, reg_dst, then_expr.idx, 0, expr->case_entry.expr, chunk); } break; case COMP_REG: { emit_op(OP_MOV64, reg_dst, then_expr.idx, 0, expr->case_entry.expr, chunk); } break; case COMP_RET: break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } if (i != array_size(node->match.cases) - 1) { // Jump to the end of the expression. sz pos0 = array_size(chunk->code); emit_op(OP_JMP, lab1, 0, 0, node->ifelse.expr_else, chunk); intintmap_insert(&chunk->labels, lab0, pos0 + 1, chunk->storage); } } else { CompResult then_expr = compile_expr(compiler, chunk, expr->case_entry.expr); switch (then_expr.type) { case COMP_CONST: { emit_op(OP_LDCONST, reg_dst, then_expr.idx, 0, expr->case_entry.expr, chunk); } break; case COMP_REG: { emit_op(OP_MOV64, reg_dst, then_expr.idx, 0, expr->case_entry.expr, chunk); } break; case COMP_RET: break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } break; } } sz pos1 = array_size(chunk->code); intintmap_insert(&chunk->labels, lab1, pos1, chunk->storage); return (CompResult){.type = COMP_REG, .idx = reg_dst}; } CompResult compile_break(Compiler *compiler, Chunk *chunk, Node *node) { emit_op(OP_JMP, compiler->lab_post, 0, 0, node, chunk); return (CompResult){.type = COMP_NIL}; } CompResult compile_continue(Compiler *compiler, Chunk *chunk, Node *node) { emit_op(OP_JMP, compiler->lab_pre, 0, 0, node, chunk); return (CompResult){.type = COMP_NIL}; } CompResult compile_while(Compiler *compiler, Chunk *chunk, Node *node) { sz lab0 = chunk->labels_idx++; sz lab1 = chunk->labels_idx++; sz pos1 = array_size(chunk->code); CompResult cond = compile_expr(compiler, chunk, node->loop.cond); OpCode jmpop; switch (cond.type) { case COMP_CONST: { jmpop = OP_JMPFI; } break; case COMP_REG: { jmpop = OP_JMPF; } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } // Jump to the `end of the loop` branch. emit_op(jmpop, lab0, cond.idx, 0, node->loop.cond, chunk); // Condition is true. compiler->lab_pre = lab1; compiler->lab_post = lab0; compile_expr(compiler, chunk, node->loop.expr); sz pos0 = array_size(chunk->code); emit_op(OP_JMP, lab1, 0, 0, node, chunk); // Update labels. intintmap_insert(&chunk->labels, lab0, pos0 + 1, chunk->storage); intintmap_insert(&chunk->labels, lab1, pos1, chunk->storage); // Return. return (CompResult){.type = COMP_NIL}; } CompResult compile_tail_call(Compiler *compiler, Chunk *chunk, Node *node, sz fun_idx) { // Update the local parameters. for (sz i = 0; i < array_size(node->elements); i++) { Node *expr = node->elements[i]; CompResult result = compile_expr(compiler, chunk, expr); switch (result.type) { case COMP_CONST: { emit_op(OP_STLVARI, i, result.idx, 0, node, chunk); } break; case COMP_REG: { if (str_eq(expr->type, cstr("Str"))) { sz var_addr = chunk->reg_idx++; sz str_addr = result.idx; emit_op(OP_LDLADDR, var_addr, i, 0, node, chunk); emit_fat_copy(chunk, node, var_addr, str_addr); } else { emit_op(OP_STLVAR, i, result.idx, 0, node, chunk); } } break; case COMP_STRING: { sz var_addr = chunk->reg_idx++; sz str_addr = chunk->reg_idx++; emit_op(OP_LDLADDR, var_addr, i, 0, node, chunk); emit_op(OP_LDSTR, str_addr, result.idx, 0, node, chunk); emit_fat_copy(chunk, node, var_addr, str_addr); } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } } if (fun_idx == chunk->fun.index) { emit_op(OP_RECUR_SELF, fun_idx, 0, 0, node, chunk); } else { emit_op(OP_RECUR, fun_idx, 0, 0, node, chunk); } return (CompResult){.type = COMP_NIL}; } CompResult compile_funcall(Compiler *compiler, Chunk *chunk, Node *node) { // Get variable information. Str name = node->unique_name; SymbolMap *map = symmap_lookup(&compiler->symbols, name); // Builtins. if (map->val.kind == SYM_BUILTIN_FUN) { if (str_eq(name, cstr("print")) || str_eq(name, cstr("println"))) { for (sz i = 0; i < array_size(node->elements); i++) { Node *expr = node->elements[i]; Str type_name = expr->type; // NOTE: slices [] and dynamic arrays [...] should be treated // differently from static arrays. if (str_has_prefix(type_name, cstr("@")) || str_has_prefix(type_name, cstr("["))) { type_name = cstr("Ptr"); } SymbolMap *map = symmap_lookup(&compiler->symbols, type_name); sz type_size = map->val.t.size; CompResult result = compile_expr(compiler, chunk, expr); if (strset_lookup(&compiler->sint_types, type_name)) { switch (result.type) { case COMP_CONST: { emit_sized_op(type_size, OP_PRINTS64I, OP_PRINTS32I, OP_PRINTS16I, OP_PRINTS8I, result.idx, 0, 0, expr, chunk); } break; case COMP_REG: { emit_sized_op(type_size, OP_PRINTS64, OP_PRINTS32, OP_PRINTS16, OP_PRINTS8, result.idx, 0, 0, expr, chunk); } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } } else if (strset_lookup(&compiler->uint_types, type_name)) { switch (result.type) { case COMP_CONST: { emit_sized_op(type_size, OP_PRINTU64I, OP_PRINTU32I, OP_PRINTU16I, OP_PRINTU8I, result.idx, 0, 0, expr, chunk); } break; case COMP_REG: { emit_sized_op(type_size, OP_PRINTU64, OP_PRINTU32, OP_PRINTU16, OP_PRINTU8, result.idx, 0, 0, expr, chunk); } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } } else if (strset_lookup(&compiler->float_types, type_name)) { switch (result.type) { case COMP_CONST: { if (type_size == 8) { emit_op(OP_PRINTF64I, result.idx, 0, 0, expr, chunk); } else { emit_op(OP_PRINTF32I, result.idx, 0, 0, expr, chunk); } } break; case COMP_REG: { if (type_size == 8) { emit_op(OP_PRINTF64, result.idx, 0, 0, expr, chunk); } else { emit_op(OP_PRINTF32, result.idx, 0, 0, expr, chunk); } } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } } else if (str_eq(type_name, cstr("Str"))) { switch (result.type) { case COMP_STRING: { emit_op(OP_PRINTSTRI, result.idx, 0, 0, expr, chunk); } break; case COMP_REG: { emit_op(OP_PRINTSTR, result.idx, 0, 0, expr, chunk); } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } } else if (str_eq(type_name, cstr("Bool"))) { switch (result.type) { case COMP_CONST: { emit_op(OP_PRINTBOOLI, result.idx, 0, 0, expr, chunk); } break; case COMP_REG: { emit_op(OP_PRINTBOOL, result.idx, 0, 0, expr, chunk); } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } } } if (str_eq(name, cstr("println"))) { sz idx = add_string(chunk, cstr("\n")); emit_op(OP_PRINTSTRI, idx, 0, 0, node, chunk); } return (CompResult){.type = COMP_NIL}; } else if (str_eq(name, cstr("sizeof"))) { // Find size of the given type. // Node *expr = node->elements[0]; // Str type_name = expr->value.str; // // Try to find the type on the table. // TypeMap *t = typemap_lookup(&compiler->type_map, type_name); // sz size = 0; // if (t) { // size = t->val.size; // } else { // // If that's not possible, try resolving the symbol. // Str name = expr->unique_name; // VarMap *map = NULL; // Chunk *next = chunk; // while (next) { // map = varmap_lookup(&next->varmap, name); // if (map) { // break; // } // next = next->parent; // } // if (!map) { // emit_compile_err(compiler, chunk, expr); // return (CompResult){.type = COMP_ERR}; // } // Variable var = map->val; // size = var_size; // FIXME: type size and tables is better done on semantic // analyzer, // enough bloat here already. // sz reg_dst = chunk->reg_idx++; // sz const_idx = add_constant(chunk, size); // emit_op(OP_LDCONST, reg_dst, const_idx, 0, node, chunk); // return (CompResult){.type = COMP_REG, .idx = reg_dst}; // } } } else if (map->val.kind == SYM_GLOBALVAR || map->val.kind == SYM_LOCALVAR) { // TODO: It's a function pointer. } // Check for tail recursive opportunities. Function *fun = map->val.fun; if (str_eq(fun->type, chunk->fun.type) && chunk->id != 0) { Node *parent = node->parent; Node *current = node; bool tail_recursive = true; while (parent != NULL) { switch (parent->kind) { case NODE_BLOCK: { sz idx = array_size(parent->statements) - 1; if (parent->statements[idx] != node) { tail_recursive = false; break; } } break; case NODE_WHILE: { if (current == parent->loop.cond) { tail_recursive = false; break; } } break; case NODE_IF: { if (current == parent->ifelse.cond) { tail_recursive = false; break; } } break; case NODE_FUN: { sz idx = array_size(parent->func.body->statements) - 1; if (parent->func.body->statements[idx] != current) { tail_recursive = false; break; } break; } break; case NODE_MATCH: { if (current == parent->match.expr) { tail_recursive = false; break; } } break; case NODE_COND: break; case NODE_CASE_COND: { if (current == parent->case_entry.cond) { tail_recursive = false; break; } } break; default: { tail_recursive = false; break; } break; } parent = parent->parent; current = current->parent; } if (tail_recursive) { return compile_tail_call(compiler, chunk, node, fun->index); } } // Reserve space for the return value if needed. if (fun->return_arity > 0) { // Put the return data into a register sz ret_size = add_constant(chunk, 8); emit_op(OP_RESERVE, ret_size, 0, 0, node, chunk); } // Send parameters to the stack. for (sz i = 0; i < array_size(node->elements); i++) { Node *expr = node->elements[i]; CompResult result = compile_expr(compiler, chunk, expr); Str type_name = expr->type; SymbolMap *map = symmap_lookup(&compiler->symbols, type_name); sz type_size = map->val.t.size; switch (result.type) { case COMP_CONST: { emit_op(OP_PUSHI, result.idx, 0, 0, expr, chunk); } break; case COMP_REG: { if (str_eq(expr->type, cstr("Str"))) { sz str_addr = result.idx; // Store the fat string pointer into the stack. sz reg_dst = chunk->reg_idx++; sz zero = add_constant(chunk, 0); sz one = add_constant(chunk, 1); emit_sized_op(type_size, OP_LD64I, OP_LD32I, OP_LD16I, OP_LD8I, reg_dst, str_addr, zero, node, chunk); emit_op(OP_PUSH, reg_dst, 0, 0, expr, chunk); emit_sized_op(type_size, OP_LD64I, OP_LD32I, OP_LD16I, OP_LD8I, reg_dst, str_addr, one, node, chunk); emit_op(OP_PUSH, reg_dst, 0, 0, expr, chunk); } else { emit_op(OP_PUSH, result.idx, 0, 0, expr, chunk); } } break; case COMP_STRING: { // Get the address for the string value variable. sz str_addr = chunk->reg_idx++; emit_op(OP_LDSTR, str_addr, result.idx, 0, node->var.val, chunk); // Store the fat string pointer into the stack. sz reg_dst = chunk->reg_idx++; sz zero = add_constant(chunk, 0); sz one = add_constant(chunk, 1); emit_op(OP_LD64I, reg_dst, str_addr, zero, node, chunk); emit_op(OP_PUSH, reg_dst, 0, 0, expr, chunk); emit_op(OP_LD64I, reg_dst, str_addr, one, node, chunk); emit_op(OP_PUSH, reg_dst, 0, 0, expr, chunk); } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } } emit_op(OP_CALL, fun->index, 0, 0, node, chunk); // Only one return parameter for now. if (fun->return_arity > 0) { // FIXME: This doesn't account for returning a value > WORD_SIZE. // Put the return data into a register sz reg_dst = chunk->reg_idx++; emit_op(OP_POP, reg_dst, 0, 0, node, chunk); return (CompResult){.type = COMP_REG, .idx = reg_dst}; } return (CompResult){.type = COMP_NIL}; } CompResult compile_return(Compiler *compiler, Chunk *chunk, Node *node) { for (sz i = 0; i < array_size(node->elements); i++) { Node *expr = node->elements[i]; CompResult res = compile_expr(compiler, chunk, expr); // TODO: Only one return field for now, but this is the idea. // Put return values into memory. switch (res.type) { case COMP_CONST: { emit_op(OP_PUTRETI, res.idx, 0, 0, node, chunk); } break; case COMP_REG: { emit_op(OP_PUTRET, res.idx, 0, 0, node, chunk); } break; case COMP_NIL: break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } break; } emit_op(OP_RET, 0, 0, 0, node, chunk); return (CompResult){.type = COMP_RET}; } Chunk * chunk_alloc(Compiler *compiler) { Chunk *chunk = arena_calloc((sz)sizeof(Chunk), compiler->storage); chunk->storage = compiler->storage; chunk->file_name = compiler->file_name; return chunk; } void verify_chunk(Chunk *chunk) { assert(chunk); if (chunk->const_idx >= 0xFFFF) { eprintln("too many constants on chunk %d: %d", chunk->id, chunk->const_idx); exit(EXIT_FAILURE); } if (chunk->str_idx >= 0xFFFF) { eprintln("too many strings on chunk %d: %d", chunk->id, chunk->str_idx); exit(EXIT_FAILURE); } if (chunk->reg_idx >= 0xFFFF) { eprintln("too many registers used on chunk %d: %d", chunk->id, chunk->reg_idx); exit(EXIT_FAILURE); } if (chunk->labels_idx >= 0xFFFF) { eprintln("too many labels used on chunk %d: %d", chunk->id, chunk->labels_idx); exit(EXIT_FAILURE); } } CompResult compile_let(Compiler *compiler, Chunk *chunk, Node *node) { // Get variable information. Str name = node->var.name->unique_name; SymbolMap *map = symmap_lookup(&compiler->symbols, name); sz idx = map->val.var.idx; sz type_size = map->val.var.type.size; sz var_size = map->val.var.size; sz op_ldaddr = map->val.kind == SYM_GLOBALVAR ? OP_LDGADDR : OP_LDLADDR; // Value. if (node->var.val) { CompResult res = compile_expr(compiler, chunk, node->var.val); switch (res.type) { case COMP_CONST: { sz reg_addr = chunk->reg_idx++; sz reg_dst = chunk->reg_idx++; emit_op(op_ldaddr, reg_addr, idx, 0, node, chunk); emit_op(OP_LDCONST, reg_dst, res.idx, 0, node, chunk); emit_sized_op(type_size, OP_ST64K, OP_ST32K, OP_ST16K, OP_ST8K, reg_dst, reg_addr, 0, node, chunk); } break; case COMP_REG: { sz reg_addr = chunk->reg_idx++; sz reg_val = res.idx; emit_op(op_ldaddr, reg_addr, idx, 0, node, chunk); if (var_size > 8) { sz size_const = add_constant(chunk, var_size); emit_op(OP_MEMCPYI, reg_addr, reg_val, size_const, node, chunk); } else { emit_sized_op(var_size, OP_ST64K, OP_ST32K, OP_ST16K, OP_ST8K, reg_val, reg_addr, 0, node, chunk); } } break; case COMP_STRING: { // Get the address for the string value variable. sz str_addr = chunk->reg_idx++; emit_op(OP_LDSTR, str_addr, res.idx, 0, node->var.val, chunk); // Get the address for the local/global storage // variable. sz var_addr = chunk->reg_idx++; emit_op(op_ldaddr, var_addr, idx, 0, node, chunk); // Copy the fat pointer. emit_fat_copy(chunk, node, var_addr, str_addr); } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } } return (CompResult){.type = COMP_NIL}; } CompResult compile_set(Compiler *compiler, Chunk *chunk, Node *node) { CompResult res_name = compile_expr(compiler, chunk, node->var.name); CompResult res_val = compile_expr(compiler, chunk, node->var.val); // Get variable information. Str type = node->var.name->type; SymbolMap *map = symmap_lookup(&compiler->symbols, type); sz type_size = map->val.t.size; switch (res_val.type) { case COMP_CONST: { sz reg_dst = chunk->reg_idx++; emit_op(OP_LDCONST, reg_dst, res_val.idx, 0, node, chunk); emit_sized_op(type_size, OP_ST64K, OP_ST32K, OP_ST16K, OP_ST8K, reg_dst, res_name.idx, 0, node, chunk); } break; case COMP_REG: { if (type_size > 8) { sz size_const = add_constant(chunk, type_size); emit_op(OP_MEMCPYI, res_name.idx, res_val.idx, size_const, node, chunk); } else { emit_sized_op(type_size, OP_ST64K, OP_ST32K, OP_ST16K, OP_ST8K, res_val.idx, res_name.idx, 0, node, chunk); } } break; case COMP_STRING: { // Get the address for the string value variable. sz str_addr = chunk->reg_idx++; emit_op(OP_LDSTR, str_addr, res_name.idx, 0, node->var.val, chunk); // Copy the fat pointer. emit_fat_copy(chunk, node, res_name.idx, str_addr); } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } return (CompResult){.type = COMP_NIL}; } CompResult compile_dot(Compiler *compiler, Chunk *chunk, Node *node) { CompResult left = compile_expr(compiler, chunk, node->binary.left); // Need to pass this to the child function for them to properly increment // the address. compiler->has_addr = true; compiler->reg_addr = left.idx; // Pointers can be accessed directly with dot syntax, no need for a->b. if (str_has_prefix(node->binary.left->type, cstr("@"))) { compiler->reg_addr = chunk->reg_idx++; emit_op(OP_LD64K, compiler->reg_addr, left.idx, 0, node, chunk); } CompResult right = compile_expr(compiler, chunk, node->binary.right); compiler->has_addr = false; return (CompResult){.type = COMP_REG, .idx = right.idx}; } CompResult compile_symbol(Compiler *compiler, Chunk *chunk, Node *node) { Str name = node->unique_name; Str type_name = node->type; SymbolMap *map = symmap_lookup(&compiler->symbols, name); // For a struct field, the base address must be passed via the // compiler->reg_addr. This will calculate the offset and return and // address or the full value. if (map->val.kind == SYM_STRUCT_FIELD) { sz size = map->val.t.element_size; sz off = add_constant(chunk, map->val.t.offset); sz reg_addr = chunk->reg_idx++; emit_op(OP_ADDI, reg_addr, compiler->reg_addr, off, node, chunk); if (node->lvalue) { return (CompResult){.type = COMP_CONST, .idx = reg_addr}; } sz reg_dst = chunk->reg_idx++; emit_sized_op(size, OP_LD64K, OP_LD32K, OP_LD16K, OP_LD8K, reg_dst, reg_addr, 0, node, chunk); return (CompResult){.type = COMP_REG, .idx = reg_dst}; } if (map->val.kind == SYM_FUN) { sz reg_dst = chunk->reg_idx++; sz fun_idx = map->val.fun->index; emit_op(OP_LDFUNPTR, reg_dst, fun_idx, 0, node, chunk); return (CompResult){.type = COMP_REG, .idx = reg_dst}; } // If we are here, symbol is a variable. sz size = map->val.var.type.size; sz idx = map->val.var.idx; sz op_ldaddr = map->val.kind == SYM_GLOBALVAR ? OP_LDGADDR : OP_LDLADDR; sz reg_addr = chunk->reg_idx++; sz reg_dst = reg_addr; emit_op(op_ldaddr, reg_addr, idx, 0, node, chunk); // Indexing a pointer to static array is allowed. if (node->parent && node->parent->kind == NODE_INDEX) { if (str_has_prefix(type_name, cstr("@["))) { emit_op(OP_LD64K, reg_addr, reg_addr, 0, node, chunk); } size = map->val.var.type.element_size; } SymbolMap *tmap = symmap_lookup(&compiler->symbols, type_name); if (compiler->has_addr) { emit_op(OP_ADD, reg_addr, reg_addr, compiler->reg_addr, node, chunk); } // TODO: move these `str_` checks into the type system and check for // tmap->val.kind or size? Likewise, strings should be treated as a []U8 // slice, which is what they are after all. if (node->lvalue || str_eq(type_name, cstr("Str")) || tmap->val.kind == SYM_STRUCT) { } else { emit_sized_op(size, OP_LD64K, OP_LD32K, OP_LD16K, OP_LD8K, reg_dst, reg_addr, 0, node, chunk); // TODO: if variable is too big and we want to write it, put it on the // stack. } return (CompResult){.type = COMP_REG, .idx = reg_dst}; } CompResult compile_index(Compiler *compiler, Chunk *chunk, Node *node) { // Load a register with the zero offset. sz reg_offset = 0; if (compiler->has_addr) { reg_offset = compiler->reg_addr; } else { reg_offset = chunk->reg_idx++; sz base_addr = add_constant(chunk, 0); emit_op(OP_LDCONST, reg_offset, base_addr, 0, node, chunk); } Node *next = node; while (next && next->t.next) { Str type_name = next->type; SymbolMap *map = symmap_lookup(&compiler->symbols, type_name); sz type_size = map->val.t.size; sz size = add_constant(chunk, type_size); sz reg_mul = chunk->reg_idx++; CompResult res = compile_expr(compiler, chunk, next->idx.value); switch (res.type) { case COMP_CONST: { sz reg_idx = chunk->reg_idx++; emit_op(OP_LDCONST, reg_idx, res.idx, 0, node, chunk); emit_op(OP_MULI, reg_mul, reg_idx, size, node, chunk); } break; case COMP_REG: { emit_op(OP_MULI, reg_mul, res.idx, size, node, chunk); } break; default: { emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } emit_op(OP_ADD, reg_offset, reg_offset, reg_mul, node, chunk); next = next->t.next; if (next->t.next) { // Ensure we are maintaining single assignment policy. sz tmp = chunk->reg_idx++; emit_op(OP_MOV64, tmp, reg_offset, reg_mul, node, chunk); reg_offset = tmp; } } compiler->reg_addr = reg_offset; compiler->has_addr = true; CompResult res = compile_expr(compiler, chunk, next); compiler->has_addr = false; return res; } CompResult compile_expr(Compiler *compiler, Chunk *chunk, Node *node) { switch (node->kind) { case NODE_DOT: return compile_dot(compiler, chunk, node); case NODE_BREAK: return compile_break(compiler, chunk, node); case NODE_CONTINUE: return compile_continue(compiler, chunk, node); case NODE_RETURN: return compile_return(compiler, chunk, node); case NODE_FUN: return (CompResult){.type = COMP_NIL}; case NODE_FUNCALL: return compile_funcall(compiler, chunk, node); case NODE_WHILE: return compile_while(compiler, chunk, node); case NODE_IF: return compile_if(compiler, chunk, node); case NODE_COND: return compile_cond(compiler, chunk, node); // Logic. case NODE_BITNOT: case NODE_NOT: return compile_unary(compiler, chunk, node); break; case NODE_AND: case NODE_OR: return compile_binary_logic(compiler, chunk, node); break; case NODE_EQ: case NODE_NEQ: case NODE_LT: case NODE_GT: case NODE_LE: // Bitwise ops. case NODE_BITAND: case NODE_BITOR: case NODE_BITXOR: case NODE_BITLSHIFT: case NODE_BITRSHIFT: // Arithmetic. case NODE_GE: case NODE_ADD: case NODE_SUB: case NODE_MUL: case NODE_DIV: case NODE_MOD: return compile_binary(compiler, chunk, node); break; case NODE_TRUE: case NODE_FALSE: case NODE_NUM_FLOAT: case NODE_NUM_UINT: case NODE_NUM_INT: { sz value = node->value.i; sz const_idx = add_constant(chunk, value); return (CompResult){ .type = COMP_CONST, .idx = const_idx, }; } break; case NODE_STRING: { Str string = node->value.str; sz str_idx = add_string(chunk, string); return (CompResult){ .type = COMP_STRING, .idx = str_idx, }; } break; case NODE_LET: return compile_let(compiler, chunk, node); case NODE_SET: return compile_set(compiler, chunk, node); case NODE_SYMBOL: return compile_symbol(compiler, chunk, node); case NODE_PTR: { Str name = node->t.next->unique_name; SymbolMap *map = symmap_lookup(&compiler->symbols, name); sz idx = map->val.var.idx; sz op_ldaddr = map->val.kind == SYM_GLOBALVAR ? OP_LDGADDR : OP_LDLADDR; sz reg_addr = chunk->reg_idx++; emit_op(op_ldaddr, reg_addr, idx, 0, node, chunk); return (CompResult){.type = COMP_REG, .idx = reg_addr}; } break; case NODE_DEREF: { Str type_name = node->type; SymbolMap *map = symmap_lookup(&compiler->symbols, type_name); sz type_size = map->val.t.element_size; Node *next = node->deref.next; CompResult res = compile_symbol(compiler, chunk, node); sz reg_dst = res.idx; sz reg_src = res.idx; while (next) { reg_dst = chunk->reg_idx++; if (next->kind == NODE_SYMBOL) { break; } emit_op(OP_LD64K, reg_dst, reg_src, 0, node, chunk); reg_src = reg_dst; next = next->deref.next; } if (!node->lvalue) { emit_sized_op(type_size, OP_LD64K, OP_LD32K, OP_LD16K, OP_LD8K, reg_dst, reg_src, 0, node, chunk); } else { emit_op(OP_LD64K, reg_dst, reg_src, 0, node, chunk); } return (CompResult){.type = COMP_REG, .idx = reg_dst}; } break; case NODE_INDEX: return compile_index(compiler, chunk, node); case NODE_BLOCK: { CompResult res; for (sz i = 0; i < array_size(node->elements); i++) { Node *root = node->elements[i]; res = compile_expr(compiler, chunk, root); if (root->kind == NODE_BREAK || root->kind == NODE_CONTINUE || root->kind == NODE_RETURN) { break; } } return res; } break; case NODE_STRUCT: case NODE_NIL: return (CompResult){.type = COMP_NIL}; default: { eprintln("error: compilation not implemented for node %s", node_str[node->kind]); emit_compile_err(compiler, chunk, node); return (CompResult){.type = COMP_ERR}; } break; } return (CompResult){.type = COMP_ERR}; } void disassemble_instruction(Chunk chunk, Instruction instruction) { print("%s", op_str[instruction.op]); for (sz i = 0; i < 11 - op_str[instruction.op].size; i++) { print(" "); } switch (instruction.op) { case OP_RECUR: case OP_CALL: println("f%d", instruction.dst, instruction.a, instruction.b); break; case OP_LDFUNPTR: println("r%d, f%d", instruction.dst, instruction.a, instruction.b); break; case OP_MOV8: case OP_MOV16: case OP_MOV32: case OP_MOV64: case OP_LD8K: case OP_LD16K: case OP_LD32K: case OP_LD64K: case OP_ST8K: case OP_ST16K: case OP_ST32K: case OP_ST64K: println("r%d, r%d", instruction.dst, instruction.a, instruction.b); break; case OP_JMPF: case OP_JMPT: println("l%d, r%d", instruction.dst, instruction.a, instruction.b); break; case OP_MOV8I: case OP_MOV16I: case OP_MOV32I: case OP_MOV64I: case OP_LDCONST: if (array_size(chunk.constants) != 0) { println("r%d, c%d (%x)", instruction.dst, instruction.a, chunk.constants[instruction.a].u); } else { println("r%d, c%d", instruction.dst, instruction.a); } break; case OP_LD8I: case OP_LD16I: case OP_LD32I: case OP_LD64I: case OP_ST8I: case OP_ST16I: case OP_ST32I: case OP_ST64I: case OP_ADDI: case OP_SUBI: case OP_MULI: case OP_DIVI: case OP_MODI: case OP_ADDFI: case OP_SUBFI: case OP_MULFI: case OP_DIVFI: case OP_MODFI: case OP_EQI: case OP_NEQI: case OP_LTI: case OP_GTI: case OP_LEI: case OP_GEI: case OP_ANDI: case OP_ORI: case OP_BITLSHIFTI: case OP_BITRSHIFTI: case OP_BITANDI: case OP_BITORI: case OP_BITXORI: case OP_MEMCPYI: if (array_size(chunk.constants) != 0) { println("r%d, r%d, c%d (%x)", instruction.dst, instruction.a, instruction.b, chunk.constants[instruction.b].u); } else { println("r%d, r%d, c%d", instruction.dst, instruction.a, instruction.b); } break; case OP_LD8: case OP_LD16: case OP_LD32: case OP_LD64: case OP_ST8: case OP_ST16: case OP_ST32: case OP_ST64: case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD: case OP_ADDF: case OP_SUBF: case OP_MULF: case OP_DIVF: case OP_MODF: case OP_EQ: case OP_NEQ: case OP_LT: case OP_GT: case OP_LE: case OP_GE: case OP_AND: case OP_OR: case OP_BITLSHIFT: case OP_BITRSHIFT: case OP_BITAND: case OP_BITOR: case OP_BITXOR: case OP_MEMCPY: println("r%d, r%d, r%d", instruction.dst, instruction.a, instruction.b); break; case OP_LDGVAR: case OP_LDGADDR: case OP_LDLVAR: case OP_LDLADDR: println("r%d, v%d", instruction.dst, instruction.a, instruction.b); break; case OP_LDSTR: println("r%d, s%d", instruction.dst, instruction.a, instruction.b); break; case OP_STGVAR: case OP_STLVAR: println("v%d, r%d", instruction.dst, instruction.a, instruction.b); break; case OP_STGVARI: case OP_STLVARI: if (array_size(chunk.constants) != 0) { println("v%d, c%d (%x)", instruction.dst, instruction.a, chunk.constants[instruction.a].u); } else { println("v%d, c%d", instruction.dst, instruction.a); } break; case OP_BITNOTI: case OP_NOTI: if (array_size(chunk.constants) != 0) { println("r%d, c%d (%x)", instruction.dst, instruction.a, chunk.constants[instruction.a].u); } else { println("r%d, c%d", instruction.dst, instruction.a); } break; case OP_BITNOT: case OP_NOT: println("r%d, r%d", instruction.dst, instruction.a, instruction.b); break; case OP_JMP: println("l%d", instruction.dst, instruction.a, instruction.b); break; case OP_JMPFI: case OP_JMPTI: if (array_size(chunk.constants) != 0) { println("l%d, c%d (%x)", instruction.dst, instruction.a, chunk.constants[instruction.a].u); } else { println("l%d, c%d", instruction.dst, instruction.a); } break; case OP_PRINTS8: case OP_PRINTS16: case OP_PRINTS32: case OP_PRINTS64: case OP_PRINTU8: case OP_PRINTU16: case OP_PRINTU32: case OP_PRINTU64: case OP_PRINTF32: case OP_PRINTF64: case OP_PRINTSTR: case OP_PRINTBOOL: case OP_PUSH: case OP_POP: case OP_PUTRET: println("r%d", instruction.dst, instruction.a, instruction.b); break; case OP_PRINTSTRI: case OP_PRINTS8I: case OP_PRINTS16I: case OP_PRINTS32I: case OP_PRINTS64I: case OP_PRINTU8I: case OP_PRINTU16I: case OP_PRINTU32I: case OP_PRINTU64I: case OP_PRINTF32I: case OP_PRINTF64I: case OP_PRINTBOOLI: case OP_RESERVE: case OP_PUSHI: case OP_PUTRETI: if (array_size(chunk.constants) != 0) { println("c%d (%x)", instruction.dst, chunk.constants[instruction.dst].u); } else { println("c%d", instruction.dst); } break; case OP_RET: case OP_RECUR_SELF: case OP_HALT: println(""); break; default: println("Unknown opcode %d", instruction.op); break; } } void disassemble_chunk(Chunk chunk) { println("═════════════════════════════════════════════════════"); println("CHUNK %d: %s%s", chunk.id, chunk.file_name, chunk.name); println("n_regs: %d, n_vars: %d, n_strings: %d, n_consts: %d", chunk.reg_idx, chunk.fun.n_vars, chunk.str_idx, chunk.const_idx); println("═══════════════════════ code ════════════════════════"); println(" LINE:COL INUM LABELS OP OPERANDS"); println("─────────────────────────────────────────────────────"); for (sz i = 0; i < array_size(chunk.code); i++) { printf(" %.4ld:%.4ld %.4lx ", chunk.linecol[i].line, chunk.linecol[i].col, i); IntIntMapIter lab_it = intintmap_iterator(chunk.labels, chunk.storage); IntIntMap *m = intintmap_next(&lab_it, chunk.storage); Str labs = cstr(""); while (m) { if (m->val == i) { labs = str_concat(labs, cstr(".L"), chunk.storage); labs = str_concat(labs, str_from_int(m->key, chunk.storage), chunk.storage); break; } m = intintmap_next(&lab_it, chunk.storage); } if (labs.size) { print("%s", labs); for (sz i = 0; i < 7 - labs.size; i++) { print(" "); } } else { printf(" "); } disassemble_instruction(chunk, chunk.code[i]); } if (array_size(chunk.constants) > 0) { println("═════════════════════ constants ═════════════════════"); for (sz i = 0; i < array_size(chunk.constants); i++) { println(" %x{2}: %x{8}", i, chunk.constants[i]); } } if (array_size(chunk.strings) > 0) { println("══════════════════════ strings ══════════════════════"); for (sz i = 0; i < array_size(chunk.strings); i++) { println(" %x{2}: %s", i, chunk.strings[i]); } } if (chunk.fun.n_vars > 0) { println("═════════════════════ variables ═════════════════════"); Variable *vars = chunk.fun.vars; for (sz i = 0; i < chunk.fun.n_vars; i++) { println(" %x{2}: [%x{4}:%x{4}] %s: %s", i, vars[i].offset, vars[i].offset + vars[i].size, vars[i].name, vars[i].type.id); } } } void bytecode_compiler(Compiler *compiler, Analyzer analyzer) { // Load quicklook tables.. compiler->numeric_types = analyzer.numeric_types; compiler->integer_types = analyzer.integer_types; compiler->uint_types = analyzer.uint_types; compiler->sint_types = analyzer.sint_types; compiler->float_types = analyzer.float_types; // Flatten the symbol table into a single map. for (sz i = 0; i < array_size(analyzer.scopes); i++) { Arena scratch = *analyzer.storage; Scope *scope = analyzer.scopes[i]; SymbolMapIter iter = symmap_iterator(scope->symbols, &scratch); SymbolMap *m = symmap_next(&iter, &scratch); while (m) { Symbol sym = m->val; symmap_insert(&compiler->symbols, sym.id, sym, compiler->storage); m = symmap_next(&iter, &scratch); } } // Initialize chunks for all functions. array_zero(compiler->chunks, analyzer.n_funcs, compiler->storage); array_head(compiler->chunks)->size = analyzer.n_funcs; FunMapIter iter = funmap_iterator(analyzer.fun_map, compiler->storage); FunMap *m = funmap_next(&iter, compiler->storage); while (m) { Function fun = m->val; compiler->chunks[fun.index] = chunk_alloc(compiler); compiler->chunks[fun.index]->id = fun.index; compiler->chunks[fun.index]->fun = fun; compiler->chunks[fun.index]->name = fun.id; m = funmap_next(&iter, compiler->storage); } // Compile all the chunks. for (sz i = 0; i < analyzer.n_funcs; i++) { Chunk *chunk = compiler->chunks[i]; CompResult res = compile_expr(compiler, chunk, chunk->fun.body); if (i == 0) { // Make sure the last result is on r0. sz res_reg = 0; bool is_nil = false; switch (res.type) { case COMP_CONST: { res_reg = chunk->reg_idx++; Instruction inst = (Instruction){ .op = OP_LDCONST, .dst = res_reg, .a = res.idx}; array_push(chunk->code, inst, chunk->storage); } break; case COMP_REG: { res_reg = res.idx; } break; case COMP_NIL: { is_nil = true; } break; default: break; } emit_op(OP_HALT, res_reg, !is_nil, 0, NULL, chunk); } else { // Put return values into memory. switch (res.type) { case COMP_CONST: { emit_op(OP_PUTRETI, res.idx, 0, 0, chunk->fun.body, chunk); } break; case COMP_REG: { emit_op(OP_PUTRET, res.idx, 0, 0, chunk->fun.body, chunk); } break; default: break; } emit_op(OP_RET, 0, 0, 0, chunk->fun.body, chunk); } verify_chunk(chunk); } } #endif // COMPILER_C