#ifndef BDL_IR_H #define BDL_IR_H typedef struct LineInfo { size_t line; size_t col; } LineInfo; typedef enum Op { // Arithmetic ops. // - Binary operations. // - Arguments are passed via the stack. // - Consume two items in the stack. OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, // Logic ops. OP_NOT, // Stack ops. // - Requires a constant Object to push into the stack. OP_PUSH, // - Discards the last value in the stack. OP_DROP, // - Duplicates the last value in the stack. OP_DUP, // - Rotates the last three elements in the stack. // - Right: [ a b c -> c a b ] // - Left: [ a b c -> b c a] OP_ROT_RIGHT, OP_ROT_LEFT, // A label for memory access. // - The argument should be a unique value. OP_LABEL, // Jump/conditional ops. // - Take a label as argument. OP_JUMP, // - Consume one value in the stack. // - All objects except `false` are considered `true`. OP_JUMP_IF_TRUE, OP_JUMP_IF_FALSE, // - These require numerical objects on the stack. // - Consume two items in the stack. // - Jumps to the given label as argument when appropriate. OP_JUMP_IF_EQ, OP_JUMP_IF_NEQ, OP_JUMP_IF_GT, OP_JUMP_IF_LT, OP_JUMP_IF_GE, OP_JUMP_IF_LE, // Variable access. // - Require an index corresponding to the variable to store. // - Consume the last item in the stack. OP_STORE_LOCAL, OP_STORE_CAPTURED, OP_STORE_PARAM, // - Require an index corresponding to the variable to load. // - The loaded value is pushed into the stack. OP_LOAD_LOCAL, OP_LOAD_CAPTURED, OP_LOAD_PARAM, // Primitive complex commands. // - Prints the last object in the stack. OP_PRINT, // Procedures. // - Consumes the last value in the stack, which must be a lambda. OP_CALL, // - Return position is on a know location of the stack based on the offset // of locals, parameters, etc. OP_RETURN, // TODO: add remaining ops. } Op; static const char* ops_str[] = { [OP_ADD] = "OP_ADD", [OP_SUB] = "OP_SUB", [OP_MUL] = "OP_MUL", [OP_DIV] = "OP_DIV", [OP_MOD] = "OP_MOD", [OP_NOT] = "OP_NOT", [OP_PUSH] = "OP_PUSH", [OP_DROP] = "OP_DROP", [OP_DUP] = "OP_DUP", [OP_ROT_RIGHT] = "OP_ROT_RIGHT", [OP_ROT_LEFT] = "OP_ROT_LEFT", [OP_LABEL] = "OP_LABEL", [OP_JUMP] = "OP_JUMP", [OP_JUMP_IF_TRUE] = "OP_JUMP_IF_TRUE", [OP_JUMP_IF_FALSE] = "OP_JUMP_IF_FALSE", [OP_JUMP_IF_EQ] = "OP_JUMP_IF_EQ", [OP_JUMP_IF_NEQ] = "OP_JUMP_IF_NEQ", [OP_JUMP_IF_GT] = "OP_JUMP_IF_GT", [OP_JUMP_IF_LT] = "OP_JUMP_IF_LT", [OP_JUMP_IF_GE] = "OP_JUMP_IF_GE", [OP_JUMP_IF_LE] = "OP_JUMP_IF_LE", [OP_STORE_LOCAL] = "OP_STORE_LOCAL", [OP_STORE_CAPTURED] = "OP_STORE_CAPTURED", [OP_STORE_PARAM] = "OP_STORE_PARAM", [OP_LOAD_LOCAL] = "OP_LOAD_LOCAL", [OP_LOAD_CAPTURED] = "OP_LOAD_CAPTURED", [OP_LOAD_PARAM] = "OP_LOAD_PARAM", [OP_PRINT] = "OP_PRINT", [OP_CALL] = "OP_CALL", [OP_RETURN] = "OP_RETURN", }; typedef struct Instruction { Op op; // Op arguments. union { // OP_PUSH Object *argument; // OP_LABEL // OP_JUMP // OP_JUMP_IF_xxx size_t label_id; // OP_STORE_LOCAL // OP_LOAD_LOCAL size_t index; }; // Original line/column for debugging purposes. size_t line; size_t col; } Instruction; typedef struct Procedure { // Procedure name. char *name; struct Procedure *parent; // Program code. Instruction *instructions; // Variables. Object **locals; Object **captured; Object **params; } Procedure; typedef struct ProgramIr { Procedure **procedures; Object **lambdas; size_t labels; } ProgramIr; #define INST_SIMPLE(PROC, OP, LINE, COL) \ do { \ Instruction inst = (Instruction){(OP), NULL, (LINE), (COL)}; \ array_push((PROC)->instructions, inst); \ } while(false); #define INST_ARG(PROC, OP, ARG, LINE, COL) \ do { \ Instruction inst = (Instruction){(OP), .argument = (ARG), (LINE), (COL)}; \ array_push((PROC)->instructions, inst); \ } while(false); #define INST_LABEL(PROC, OP, ARG, LINE, COL) \ do { \ Instruction inst = (Instruction){(OP), .label_id = (ARG), (LINE), (COL)}; \ array_push((PROC)->instructions, inst); \ } while(false); #define INST_VAR(PROC, OP, ARG, LINE, COL) \ do { \ Instruction inst = (Instruction){(OP), .index = (ARG), (LINE), (COL)}; \ array_push((PROC)->instructions, inst); \ } while(false); void print_instruction(Instruction *instruction) { printf("%4ld:%-4ld ", instruction->line, instruction->col); Op op = instruction->op; switch (op) { case OP_PUSH: { printf("%-16s -> ", ops_str[op]); OBJ_PRINT(instruction->argument); } break; case OP_JUMP: case OP_JUMP_IF_TRUE: case OP_JUMP_IF_FALSE: case OP_JUMP_IF_EQ: case OP_JUMP_IF_NEQ: case OP_JUMP_IF_GT: case OP_JUMP_IF_LT: case OP_JUMP_IF_GE: case OP_JUMP_IF_LE: case OP_LABEL: { printf("%-16s -> %zu\n", ops_str[op], instruction->label_id); } break; case OP_STORE_LOCAL: case OP_STORE_CAPTURED: case OP_STORE_PARAM: case OP_LOAD_LOCAL: case OP_LOAD_CAPTURED: case OP_LOAD_PARAM: { printf("%-16s -> %zu\n", ops_str[op], instruction->index); } break; default: { printf("%s\n", ops_str[op]); } break; } } void print_procedure(Procedure *proc) { printf("===== %.*s =====\n", (int)array_size(proc->name), proc->name); printf("code:\n"); for (size_t i = 0; i < array_size(proc->instructions); ++i) { print_instruction(&proc->instructions[i]); } } Procedure * proc_alloc(ProgramIr *program, StringView name, Procedure *parent) { Procedure *proc = calloc(1, sizeof(Procedure)); array_init(proc->name, name.n); array_insert(proc->name, name.start, name.n); array_init(proc->instructions, 0); array_init(proc->locals, 0); array_init(proc->captured, 0); array_init(proc->params, 0); proc->parent = parent; array_push(program->procedures, proc); return proc; } void compile_object(ProgramIr *program, Procedure *proc, Object *obj); void compile_arithmetic(ProgramIr *program, Procedure *proc, Op op, size_t line, size_t col, Object *args) { compile_object(program, proc, args->head); args = args->tail; while (args != NULL) { compile_object(program, proc, args->head); args = args->tail; INST_SIMPLE(proc, op, line, col); } } void compile_numeric_cmp(ProgramIr *program, Procedure *proc, Op op, size_t line, size_t col, Object *args) { size_t label_false = program->labels++; size_t label_exit = program->labels++; compile_object(program, proc, args->head); args = args->tail; while (args != NULL) { compile_object(program, proc, args->head); args = args->tail; INST_SIMPLE(proc, OP_DUP, line, col); INST_SIMPLE(proc, OP_ROT_RIGHT, line, col); INST_LABEL(proc, op, label_false, line, col); } INST_SIMPLE(proc, OP_DROP, line, col); INST_ARG(proc, OP_PUSH, &obj_true, line, col); INST_LABEL(proc, OP_JUMP, label_exit, line, col); INST_LABEL(proc, OP_LABEL, label_false, line, col); INST_SIMPLE(proc, OP_DROP, line, col); INST_ARG(proc, OP_PUSH, &obj_false, line, col); INST_LABEL(proc, OP_LABEL, label_exit, line, col); } void compile_print(ProgramIr *program, Procedure *proc, size_t line, size_t col, Object *args) { while (args != NULL) { compile_object(program, proc, args->head); args = args->tail; INST_SIMPLE(proc, OP_PRINT, line, col); } } void compile_not(ProgramIr *program, Procedure *proc, size_t line, size_t col, Object *args) { compile_object(program, proc, args->head); INST_SIMPLE(proc, OP_NOT, line, col); } void compile_and(ProgramIr *program, Procedure *proc, size_t line, size_t col, Object *args) { size_t label_false = program->labels++; size_t label_exit = program->labels++; while (args != NULL) { compile_object(program, proc, args->head); args = args->tail; INST_LABEL(proc, OP_JUMP_IF_FALSE, label_false, line, col); } INST_ARG(proc, OP_PUSH, &obj_true, line, col); INST_LABEL(proc, OP_JUMP, label_exit, line, col); INST_LABEL(proc, OP_LABEL, label_false, line, col); INST_ARG(proc, OP_PUSH, &obj_false, line, col); INST_LABEL(proc, OP_LABEL, label_exit, line, col); } void compile_or(ProgramIr *program, Procedure *proc, size_t line, size_t col, Object *args) { size_t label_true = program->labels++; size_t label_exit = program->labels++; while (args != NULL) { compile_object(program, proc, args->head); args = args->tail; INST_LABEL(proc, OP_JUMP_IF_TRUE, label_true, line, col); } INST_ARG(proc, OP_PUSH, &obj_false, line, col); INST_LABEL(proc, OP_JUMP, label_exit, line, col); INST_LABEL(proc, OP_LABEL, label_true, line, col); INST_ARG(proc, OP_PUSH, &obj_true, line, col); INST_LABEL(proc, OP_LABEL, label_exit, line, col); } void compile_builtin(ProgramIr *program, Procedure *proc, Object *obj) { size_t line = obj->line; size_t col = obj->col; switch (obj->head->builtin) { case BUILTIN_ADD: { compile_arithmetic(program, proc, OP_ADD, line, col, obj->tail); } break; case BUILTIN_SUB: { compile_arithmetic(program, proc, OP_SUB, line, col, obj->tail); } break; case BUILTIN_MUL: { compile_arithmetic(program, proc, OP_MUL, line, col, obj->tail); } break; case BUILTIN_DIV: { compile_arithmetic(program, proc, OP_DIV, line, col, obj->tail); } break; case BUILTIN_MOD: { compile_arithmetic(program, proc, OP_MOD, line, col, obj->tail); } break; case BUILTIN_PRINT: { compile_print(program, proc, line, col, obj->tail); } break; case BUILTIN_NOT: { compile_not(program, proc, line, col, obj->tail); } break; case BUILTIN_AND: { compile_and(program, proc, line, col, obj->tail); } break; case BUILTIN_OR: { compile_or(program, proc, line, col, obj->tail); } break; case BUILTIN_EQ: { compile_numeric_cmp(program, proc, OP_JUMP_IF_NEQ, line, col, obj->tail); } break; case BUILTIN_GT: { compile_numeric_cmp(program, proc, OP_JUMP_IF_LE, line, col, obj->tail); } break; case BUILTIN_LT: { compile_numeric_cmp(program, proc, OP_JUMP_IF_GE, line, col, obj->tail); } break; case BUILTIN_GE: { compile_numeric_cmp(program, proc, OP_JUMP_IF_LT, line, col, obj->tail); } break; case BUILTIN_LE: { compile_numeric_cmp(program, proc, OP_JUMP_IF_GT, line, col, obj->tail); } break; // TODO: cons, car, cdr, type checks (nil? zero? fixnum? bool? ...) default: { assert(false && "builtin not implemented"); } break; } } void compile_proc_call(ProgramIr *program, Procedure *proc, Object *obj) { if (IS_BUILTIN(obj->head)) { compile_builtin(program, proc, obj); } else { Object *tail = obj->tail; while (tail != NULL) { compile_object(program, proc, tail->head); tail = tail->tail; } compile_object(program, proc, obj->head); INST_SIMPLE(proc, OP_CALL, obj->line, obj->col); } } void compile_if(ProgramIr *program, Procedure *proc, Object *obj) { size_t label_false = program->labels++; compile_object(program, proc, obj->condition); INST_LABEL(proc, OP_JUMP_IF_FALSE, label_false, obj->line, obj->col); compile_object(program, proc, obj->expr_true); if (obj->expr_false != NULL) { size_t label_exit = program->labels++; INST_LABEL(proc, OP_JUMP, label_exit, obj->line, obj->col); INST_LABEL(proc, OP_LABEL, label_false, obj->line, obj->col); compile_object(program, proc, obj->expr_false); INST_LABEL(proc, OP_LABEL, label_exit, obj->line, obj->col); } else { INST_LABEL(proc, OP_LABEL, label_false, obj->line, obj->col); } } void compile_def(ProgramIr *program, Procedure *proc, Object *obj) { ssize_t idx = find_var_index(proc->locals, obj->var_name); if (idx == -1) { array_push(proc->locals, obj->var_name); idx = array_size(proc->locals) - 1; } compile_object(program, proc, obj->var_expr); INST_VAR(proc, OP_STORE_LOCAL, idx, obj->line, obj->col); } void compile_lambda(ProgramIr *program, Procedure *proc, Object *obj) { // NOTE: As an optimization, instead of storing and comparing lambdas, we // could calculate a checksum and only check equality in full if they // differ. We can also calculate the equality of Procedure instead of // lambdas. for (size_t i = 0; i < array_size(program->lambdas); ++i) { if (object_equal(program->lambdas[i], obj)) { INST_ARG(proc, OP_PUSH, obj, obj->line, obj->col); return; } } array_push(program->lambdas, obj); Procedure *lambda = proc_alloc(program, STRING("lambda"), proc); // Parameters. for (size_t i = 0; i < array_size(obj->params); ++i) { array_push(lambda->params, obj->params[i]); } // Body. for (size_t i = 0; i < array_size(obj->body) - 1; i++) { compile_object(program, lambda, obj->body[i]); } Object *last_expr = obj->body[array_size(obj->body) - 1]; // Tail Call Optimization. // TODO: also for if statements if (IS_PAIR(last_expr)) { if (IS_BUILTIN(last_expr->head)) { compile_builtin(program, lambda, last_expr); } else { // TODO: Discard the previous stack frame. // context_printf(" mov rsp, rbp\n"); // TODO: Replace stack frame instead of compiling a new one. compile_proc_call(program, lambda, last_expr); // size_t old_offset = n_locals + n_captured + n_params; // size_t new_offset = compile_call_body(last_expr); // context_printf(" mov rdi, [rbp - 8]\n"); // for (size_t i = 0; i < new_offset + 1; i++) { // context_printf(" mov rax, [rbp - 8 * %zu]\n", i + 1); // context_printf(" mov [rbp + 8 * %zu], rax\n", old_offset - i); // } // // Set the stack pointer at the end of given parameters. // context_printf(" mov rsp, rbp\n"); // ssize_t offset_diff = old_offset - new_offset; // if (offset_diff > 0) { // context_printf(" add rsp, 8 * %zu\n", offset_diff); // } else { // context_printf(" sub rsp, 8 * %zu\n", offset_diff); // } // context_printf(" jmp rdi\n"); } } else { // compile_nil(); compile_object(program, lambda, last_expr); // // Return is stored in the `rax`. // context_printf(" pop rax\n"); // // Restore the previous call frame. // size_t rp_offset = (n_locals + n_params + n_captured + 1); // context_printf(" mov rdi, [rbp + %zu]\n", 8 * rp_offset); // context_printf(" mov rsp, rbp\n"); // context_printf(" add rsp, %zu\n", 8 * n_locals); // context_printf(" jmp rdi\n"); } INST_SIMPLE(lambda, OP_RETURN, obj->line, obj->col); INST_ARG(proc, OP_PUSH, obj, obj->line, obj->col); } void compile_symbol(Procedure *proc, Object *obj) { ssize_t idx = -1; // Is a local variable? idx = find_var_index(proc->locals, obj); if (idx != -1) { INST_VAR(proc, OP_LOAD_LOCAL, idx, obj->line, obj->col); return; } // Is a captured variable? idx = find_var_index(proc->captured, obj); if (idx != -1) { INST_VAR(proc, OP_LOAD_CAPTURED, idx, obj->line, obj->col); return; } // Is a function parameter? idx = find_var_index(proc->params, obj); if (idx != -1) { INST_VAR(proc, OP_LOAD_PARAM, idx, obj->line, obj->col); return; } // Not in this scope, if is in another scope, it must be captured. Since we // perform semantic analysis before this should always be true in this // phase. array_push(proc->captured, obj); INST_VAR(proc, OP_LOAD_CAPTURED, array_size(proc->captured) - 1, obj->line, obj->col); } void compile_object(ProgramIr *program, Procedure *proc, Object *obj) { switch (obj->type) { case OBJ_TYPE_NIL: case OBJ_TYPE_TRUE: case OBJ_TYPE_FALSE: case OBJ_TYPE_STRING: case OBJ_TYPE_FIXNUM: { INST_ARG(proc, OP_PUSH, obj, obj->line, obj->col); } break; case OBJ_TYPE_PAIR: { compile_proc_call(program, proc, obj); } break; case OBJ_TYPE_IF: { compile_if(program, proc, obj); } break; case OBJ_TYPE_LAMBDA: { compile_lambda(program, proc, obj); } break; case OBJ_TYPE_DEF: { compile_def(program, proc, obj); } break; case OBJ_TYPE_SYMBOL: { compile_symbol(proc, obj); } break; default: { assert(false && "compile_object not implemented"); } break; } } ProgramIr compile(Program program) { ProgramIr program_ir = {0}; array_init(program_ir.procedures, 0); array_init(program_ir.lambdas, 0); Procedure *main = proc_alloc(&program_ir, STRING("main"), NULL); for (size_t i = 0; i < array_size(program.roots); i++) { Object *root = program.roots[i]; compile_object(&program_ir, main, root); } // DEBUG:... for (size_t i = 0; i < array_size(program_ir.procedures); ++i) { print_procedure(program_ir.procedures[i]); } return program_ir; } #endif // BDL_IR_H