#ifndef BDL_COMPILER_H #define BDL_COMPILER_H #define PRELUDE_FILE "src/x86_64/prelude.asm" #define POSTLUDE_FILE "src/x86_64/postlude.asm" #define HEAP_SIZE MB(32) typedef struct Constant { Object *obj; char *label; } Constant; static Constant *constants = NULL; static char **labels = NULL; static char **procedures = NULL; static char* current_context = NULL; #define context_printf(fmt, ...) \ do { \ char buf[KB(4)]; \ int n_chars = sprintf(buf, fmt, ##__VA_ARGS__); \ array_insert(current_context, buf, n_chars); \ } while(false); // TODO: Separate c/h files // TODO: Create a "driver.c" file with the (display) function for external // linkage or assembly inlining. // Immediate constants. #define NIL_VAL 47LU #define BOOL_MASK 127LU #define BOOL_TAG 31LU #define BOOL_SHIFT 7LU #define TRUE_VAL ((1 << BOOL_SHIFT) | BOOL_TAG) #define FALSE_VAL ((0 << BOOL_SHIFT) | BOOL_TAG) #define FIXNUM_MASK 3LU #define FIXNUM_TAG 0LU #define FIXNUM_SHIFT 2LU // Heap allocated objects. #define PTR_MASK ~7LU #define STRING_MASK 7LU #define STRING_TAG 3LU #define PAIR_MASK 7LU #define PAIR_TAG 1LU #define LAMBDA_MASK 7LU #define LAMBDA_TAG 6LU void compile_object(Object *obj); void compile_fixnum(Object *obj); void compile_proc_call(Object *obj); void compile(Root *roots); char * generate_label(char *prefix) { static size_t label_counter = 0; char buf[32]; sprintf(buf, "%s%zu", prefix, label_counter++); size_t len = strlen(buf); char * ret = malloc(len + 1); memcpy(ret, buf, len); ret[len] = 0; array_push(labels, ret); return ret; } void emit_file(char *file_name) { FILE *file = fopen(file_name, "r"); if (!file) { fprintf(stderr, "error: couldn't open input file: %s\n", file_name); exit(EXIT_FAILURE); } char buf[1024]; size_t n = 0; while ((n = fread(&buf, 1, 1024, file)) > 0) { fwrite(buf, 1, n, stdout); } } void compile_fixnum(Object *obj) { context_printf(" ;; --> compile_fixnum\n"); context_printf(" mov rax, %zu\n", (obj->fixnum << FIXNUM_SHIFT) | FIXNUM_TAG); context_printf(" push rax\n"); context_printf(" ;; <-- compile_fixnum\n"); } void compile_boolean(Object *obj) { context_printf(" ;; --> compile_boolean\n"); int is_true = obj->type == OBJ_TYPE_TRUE; context_printf(" mov rax, %zu\n", (is_true << BOOL_SHIFT) | BOOL_TAG); context_printf(" push rax\n"); context_printf(" ;; <-- compile_boolean\n"); } void compile_nil(void) { context_printf(" ;; --> compile_nil\n"); context_printf(" mov rax, NIL_VAL\n"); context_printf(" push rax\n"); context_printf(" ;; <-- compile_nil\n"); } typedef enum OpType { // Arithmetic. OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, // Type predicates. OP_IS_NIL, OP_IS_ZERO, OP_IS_BOOL, OP_IS_FIXNUM, // Logic operations. OP_EQUAL, OP_GREATER, OP_LESS, OP_GREATER_EQ, OP_LESS_EQ, } OpType; void compile_type_predicate(OpType op, Object* args) { context_printf(" ;; --> compile_type_predicate\n"); compile_object(args->head); context_printf(" pop rax\n"); switch (op) { case OP_IS_NIL: { context_printf(" cmp rax, NIL_VAL\n"); } break; case OP_IS_ZERO: { context_printf(" cmp rax, 0\n"); } break; case OP_IS_BOOL: { context_printf(" and rax, BOOL_MASK\n"); context_printf(" cmp rax, BOOL_TAG\n"); } break; case OP_IS_FIXNUM: { context_printf(" and rax, FIXNUM_MASK\n"); context_printf(" cmp rax, FIXNUM_TAG\n"); } break; default: break; } context_printf(" mov rax, 0\n"); context_printf(" sete al\n"); context_printf(" shl rax, BOOL_SHIFT\n"); context_printf(" or rax, BOOL_TAG\n"); context_printf(" push rax\n"); context_printf(" ;; <-- compile_type_predicate\n"); } void compile_not(Object* args) { context_printf(" ;; --> compile_not\n"); compile_object(args->head); context_printf(" pop rax\n"); context_printf(" cmp rax, FALSE_VAL\n"); context_printf(" mov rax, 0\n"); context_printf(" sete al\n"); context_printf(" shl rax, BOOL_SHIFT\n"); context_printf(" or rax, BOOL_TAG\n"); context_printf(" push rax\n"); context_printf(" ;; <-- compile_not\n"); } void compile_and(Object *args) { context_printf(" ;; --> compile_and\n"); char *lab_false = generate_label("BDLL"); char *lab_exit = generate_label("BDLL"); while (args != NULL) { compile_object(args->head); args = args->tail; context_printf(" pop rax\n"); context_printf(" cmp rax, FALSE_VAL\n"); context_printf(" je %s\n", lab_false); } context_printf(" mov rax, TRUE_VAL\n"); context_printf(" push rax\n"); context_printf(" jmp %s\n", lab_exit); context_printf("%s:\n", lab_false); context_printf(" mov rax, FALSE_VAL\n"); context_printf(" push rax\n"); context_printf("%s:\n", lab_exit); context_printf(" ;; <-- compile_and\n"); } void compile_or(Object *args) { context_printf(" ;; --> compile_or\n"); char *lab_true = generate_label("BDLL"); char *lab_exit = generate_label("BDLL"); while (args != NULL) { compile_object(args->head); args = args->tail; context_printf(" pop rax\n"); context_printf(" cmp rax, FALSE_VAL\n"); context_printf(" jne %s\n", lab_true); } context_printf(" mov rax, FALSE_VAL\n"); context_printf(" push rax\n"); context_printf(" jmp %s\n", lab_exit); context_printf("%s:\n", lab_true); context_printf(" mov rax, TRUE_VAL\n"); context_printf(" push rax\n"); context_printf("%s:\n", lab_exit); context_printf(" ;; <-- compile_or\n"); } void compile_cmp_list(OpType op, Object* args) { context_printf(" ;; --> compile_cmp_list\n"); compile_object(args->head); char *lab_false = generate_label("BDLL"); char *lab_exit = generate_label("BDLL"); args = args->tail; while (args != NULL) { compile_object(args->head); args = args->tail; // Current value. context_printf(" pop rcx\n"); // Previous value. context_printf(" pop rax\n"); // Comparison. context_printf(" cmp rax, rcx\n"); switch (op) { case OP_EQUAL: { context_printf(" jne %s\n", lab_false); } break; case OP_GREATER: { context_printf(" jle %s\n", lab_false); } break; case OP_LESS: { context_printf(" jge %s\n", lab_false); } break; case OP_GREATER_EQ: { context_printf(" jl %s\n", lab_false); } break; case OP_LESS_EQ: { context_printf(" jg %s\n", lab_false); } break; default: break; } context_printf(" push rcx\n"); } context_printf(" pop rcx\n"); context_printf(" mov rax, TRUE_VAL\n"); context_printf(" push rax\n"); context_printf(" jmp %s\n", lab_exit); context_printf("%s:\n", lab_false); context_printf(" mov rax, FALSE_VAL\n"); context_printf(" push rax\n"); context_printf("%s:\n", lab_exit); context_printf(" ;; <-- compile_cmp_list\n"); } void compile_arithmetic_list(OpType op, Object* args) { context_printf(" ;; --> compile_arithmetic\n"); compile_object(args->head); args = args->tail; while (args != NULL) { compile_object(args->head); args = args->tail; context_printf(" pop rcx\n"); context_printf(" pop rax\n"); switch (op) { case OP_ADD: { context_printf(" add rax, rcx\n"); } break; case OP_SUB: { context_printf(" sub rax, rcx\n"); } break; case OP_MUL: { context_printf(" sar rax, FIXNUM_SHIFT\n"); context_printf(" sar rcx, FIXNUM_SHIFT\n"); context_printf(" mul rcx\n"); context_printf(" shl rax, FIXNUM_SHIFT\n"); } break; case OP_DIV: { context_printf(" sar rax, FIXNUM_SHIFT\n"); context_printf(" sar rcx, FIXNUM_SHIFT\n"); context_printf(" mov rdx, 0\n"); context_printf(" div rcx\n"); context_printf(" shl rax, FIXNUM_SHIFT\n"); } break; case OP_MOD: { context_printf(" sar rax, FIXNUM_SHIFT\n"); context_printf(" sar rcx, FIXNUM_SHIFT\n"); context_printf(" mov rdx, 0\n"); context_printf(" div rcx\n"); context_printf(" mov rax, rdx\n"); context_printf(" shl rax, FIXNUM_SHIFT\n"); } break; default: break; } context_printf(" push rax\n"); } context_printf(" ;; <-- compile_arithmetic\n"); } void compile_cons(Object *obj) { context_printf(" ;; --> compile_cons\n"); // Store objects into the car and cdr. compile_object(obj->head); compile_object(obj->tail->head); context_printf(" pop rdx\n"); context_printf(" pop rax\n"); context_printf(" mov [r15], rax\n"); context_printf(" mov [r15 + 8], rdx\n"); // Push memory address of cons cell. context_printf(" mov rax, r15\n"); context_printf(" or rax, %zu\n", PAIR_TAG); context_printf(" push rax\n"); // Bump allocation register. context_printf(" add r15, 16\n"); context_printf(" ;; <-- compile_cons\n"); } void compile_car(Object *obj) { context_printf(" ;; --> compile_car\n"); compile_object(obj->head); context_printf(" pop rax\n"); context_printf(" and rax, %zu\n", ~PAIR_MASK); context_printf(" mov rdx, [rax]\n"); context_printf(" push rdx\n"); context_printf(" ;; <-- compile_car\n"); } void compile_cdr(Object *obj) { context_printf(" ;; --> compile_cdr\n"); compile_object(obj->head); context_printf(" pop rax\n"); context_printf(" and rax, %zu\n", ~PAIR_MASK); context_printf(" mov rdx, [rax + 8]\n"); context_printf(" push rdx\n"); context_printf(" ;; <-- compile_cdr\n"); } void compile_proc_call(Object *obj) { // TODO: Probably we want to use a hash table for these lookups that is // initialized at the start of the compilation procedure. if (sv_equal(&obj->head->text, &STRING("+"))) { compile_arithmetic_list(OP_ADD, obj->tail); } else if (sv_equal(&obj->head->text, &STRING("-"))) { compile_arithmetic_list(OP_SUB, obj->tail); } else if (sv_equal(&obj->head->text, &STRING("*"))) { compile_arithmetic_list(OP_MUL, obj->tail); } else if (sv_equal(&obj->head->text, &STRING("/"))) { compile_arithmetic_list(OP_DIV, obj->tail); } else if (sv_equal(&obj->head->text, &STRING("%"))) { compile_arithmetic_list(OP_MOD, obj->tail); } else if (sv_equal(&obj->head->text, &STRING("nil?"))) { compile_type_predicate(OP_IS_NIL, obj->tail); } else if (sv_equal(&obj->head->text, &STRING("zero?"))) { compile_type_predicate(OP_IS_ZERO, obj->tail); } else if (sv_equal(&obj->head->text, &STRING("fixnum?"))) { compile_type_predicate(OP_IS_FIXNUM, obj->tail); } else if (sv_equal(&obj->head->text, &STRING("bool?"))) { compile_type_predicate(OP_IS_BOOL, obj->tail); } else if (sv_equal(&obj->head->text, &STRING("display"))) { compile_object(obj->tail->head); context_printf(" pop rdi\n"); context_printf(" call display\n"); compile_nil(); } else if (sv_equal(&obj->head->text, &STRING("not"))) { compile_not(obj->tail); } else if (sv_equal(&obj->head->text, &STRING("and"))) { compile_and(obj->tail); } else if (sv_equal(&obj->head->text, &STRING("or"))) { compile_or(obj->tail); } else if (sv_equal(&obj->head->text, &STRING("="))) { compile_cmp_list(OP_EQUAL, obj->tail); } else if (sv_equal(&obj->head->text, &STRING(">"))) { compile_cmp_list(OP_GREATER, obj->tail); } else if (sv_equal(&obj->head->text, &STRING("<"))) { compile_cmp_list(OP_LESS, obj->tail); } else if (sv_equal(&obj->head->text, &STRING(">="))) { compile_cmp_list(OP_GREATER_EQ, obj->tail); } else if (sv_equal(&obj->head->text, &STRING("<="))) { compile_cmp_list(OP_LESS_EQ, obj->tail); } else if (sv_equal(&obj->head->text, &STRING("cons"))) { compile_cons(obj->tail); } else if (sv_equal(&obj->head->text, &STRING("car"))) { compile_car(obj->tail); } else if (sv_equal(&obj->head->text, &STRING("cdr"))) { compile_cdr(obj->tail); } else { compile_object(obj->head); size_t n_args = 0; while (obj->tail != NULL) { obj = obj->tail; compile_object(obj->head); n_args++; } context_printf(" mov rax, [rsp + %zu]\n", 8 * n_args); context_printf(" mov rcx, PTR_MASK\n"); context_printf(" and rcx, rax\n"); context_printf(" mov rax, [rcx]\n"); context_printf(" call rax\n"); context_printf(" push rax\n"); } } void compile_if(Object *obj) { context_printf(" ;; --> compile_if\n"); char *lab_false = generate_label("BDLL"); compile_object(obj->condition); context_printf(" pop rax\n"); context_printf(" cmp rax, FALSE_VAL\n"); context_printf(" je %s\n", lab_false); compile_object(obj->expr_true); if (obj->expr_false != NULL) { char *lab_exit = generate_label("BDLL"); context_printf(" jmp %s\n", lab_exit); context_printf("%s:\n", lab_false); compile_object(obj->expr_false); context_printf("%s:\n", lab_exit); } else { context_printf("%s:\n", lab_false); } context_printf(" ;; <-- compile_if\n"); } void compile_string(Object *obj) { context_printf(" ;; --> compile_string\n"); Constant c; // Check if the string is already stored as a constant. ssize_t idx = -1; for (size_t i = 0; i < array_size(constants); i++) { c = constants[i]; if (object_equal(c.obj, obj)) { idx = i; break; } } if (idx < 0) { idx = array_size(constants); c = (Constant){ .obj = obj, .label = generate_label("BDLC"), }; array_push(constants, c); } // Create a tagged pointer to the label. context_printf(" mov rax, %s\n", c.label); context_printf(" or rax, STRING_TAG\n"); context_printf(" push rax\n"); context_printf(" ;; <-- compile_string\n"); } void compile_lambda(Object *obj) { context_printf(" ;; --> compile_lambda\n"); // Create a new compilation context. char *prev_context = current_context; current_context = NULL; array_init(current_context, 0); char *name = generate_label("BDLP"); context_printf("alignb 8\n"); context_printf("%s:\n", name); // Initialize function call frame. context_printf(" push rbp\n"); context_printf(" mov rbp, rsp\n"); // Procedure body. for (size_t i = 0; i < array_size(obj->body); i++) { compile_object(obj->body[i]); } // Return is stored in the `rax`. context_printf(" pop rax\n"); // Restore the previous call frame. context_printf(" mov rsp, rbp\n"); context_printf(" pop rbp\n"); context_printf(" ret\n"); context_printf("\n"); // Restore previous compilation context. array_push(procedures, current_context); current_context = prev_context; // Create tagged pointer with this lambda procedure. context_printf(" mov rax, %s\n", name); context_printf(" mov [r15], rax\n"); context_printf(" mov rax, r15\n"); context_printf(" or rax, %zu\n", LAMBDA_TAG); // Push compiled object to the stack. context_printf(" push rax\n"); // TODO: When we add closed variables they will be stored here, for now just // incrementing by a 64 bit pointer size, as that is what we are storing. context_printf(" add r15, 8\n"); context_printf(" ;; <-- compile_lambda\n"); } void compile_object(Object *obj) { switch (obj->type) { case OBJ_TYPE_NIL: { compile_nil(); } break; case OBJ_TYPE_TRUE: case OBJ_TYPE_FALSE: { compile_boolean(obj); } break; case OBJ_TYPE_FIXNUM: { compile_fixnum(obj); } break; case OBJ_TYPE_PAIR: { compile_proc_call(obj); } break; case OBJ_TYPE_STRING: { compile_string(obj); } break; case OBJ_TYPE_IF: { compile_if(obj); } break; case OBJ_TYPE_LAMBDA: { compile_lambda(obj); } break; default: break; } } void emit_bss_section(void) { printf("section .bss\n"); printf("bdl_heap:\n"); printf(" resb HEAP_SIZE\n"); printf("\n"); } void emit_data_section(void) { printf("section .data\n"); printf("true_str:\n db \"true\", 10\n"); printf(" alignb 8\n"); printf("false_str:\n db \"false\", 10\n"); printf(" alignb 8\n"); printf("lambda_str:\n"); char lambda_str[] = "#{lambda}"; printf(" dq %ld\n", sizeof(lambda_str)); printf(" db \"%s\", 10\n", lambda_str); printf(" alignb 8\n"); for (size_t i = 0; i < array_size(constants); i++) { // NOTE: Only supporting string constants for now. Constant c = constants[i]; int n = c.obj->text.n; // TODO: escape characters maybe? // TODO: quote all strings maybe? printf("%s:\n", c.label); printf(" dq %d\n", n + 1); printf(" db \"%.*s\", 10\n", n, c.obj->text.start); printf(" alignb 8\n"); } printf("\n"); } void compile(Root *roots) { // Prepare compilation variables. array_init(constants, 0); array_init(labels, 0); array_init(procedures, 0); array_init(current_context, 0); // Compile program. for (size_t i = 0; i < array_size(roots); i++) { Object *root = roots[i]; compile_object(root); } // Base defines. printf("%%define NIL_VAL %zu\n", NIL_VAL); printf("%%define TRUE_VAL %zu\n", TRUE_VAL); printf("%%define FALSE_VAL %zu\n", FALSE_VAL); printf("%%define BOOL_MASK %zu\n", BOOL_MASK); printf("%%define BOOL_TAG %zu\n", BOOL_TAG); printf("%%define BOOL_SHIFT %zu\n", BOOL_SHIFT); printf("%%define FIXNUM_MASK %zu\n", FIXNUM_MASK); printf("%%define FIXNUM_TAG %zu\n", FIXNUM_TAG); printf("%%define FIXNUM_SHIFT %zu\n", FIXNUM_SHIFT); printf("%%define PAIR_MASK %zu\n", PAIR_MASK); printf("%%define PAIR_TAG %zu\n", PAIR_TAG); printf("%%define PTR_MASK %zu\n", PTR_MASK); printf("%%define STRING_MASK %zu\n", STRING_MASK); printf("%%define STRING_TAG %zu\n", STRING_TAG); printf("%%define LAMBDA_MASK %zu\n", LAMBDA_MASK); printf("%%define LAMBDA_TAG %zu\n", LAMBDA_TAG); printf("%%define HEAP_SIZE %zu\n", HEAP_SIZE); printf("\n"); // Prelude. emit_file(PRELUDE_FILE); printf("\n"); // Function definitions. for (size_t i = 0; i < array_size(procedures); i++) { char *ctx = procedures[i]; for (size_t i = 0; i < array_size(ctx); i++) { putchar(ctx[i]); } } // Main context. printf("alignb 8\n"); printf("global _start\n"); printf("_start:\n"); printf(" mov r15, bdl_heap\n"); printf(" push NIL_VAL\n"); for (size_t i = 0; i < array_size(current_context); i++) { putchar(current_context[i]); } // Postlude. emit_file(POSTLUDE_FILE); emit_data_section(); emit_bss_section(); // Clean resources. array_free(constants); for (size_t i = 0; i < array_size(labels); i++) { free(labels[i]); } array_free(labels); for (size_t i = 0; i < array_size(procedures); i++) { array_free(procedures[i]); } array_free(procedures); } #endif // BDL_COMPILER_H