From 701369bfb490596a7336415d01142fdee52d5415 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Tue, 9 Nov 2021 10:15:26 +0100 Subject: Add support for compiling string literals --- src/compiler.h | 130 ++++++++++++++++++++++++++++++++++++++++-------- src/x86_64/postlude.asm | 5 +- src/x86_64/prelude.asm | 23 +++++++-- 3 files changed, 128 insertions(+), 30 deletions(-) diff --git a/src/compiler.h b/src/compiler.h index 5e1bf7d..3227eba 100644 --- a/src/compiler.h +++ b/src/compiler.h @@ -1,12 +1,23 @@ #ifndef BDL_COMPILER_H #define BDL_COMPILER_H - #define PRELUDE_FILE "src/x86_64/prelude.asm" #define POSTLUDE_FILE "src/x86_64/postlude.asm" #define HEAP_SIZE MB(32) +typedef struct Constant { + Object *obj; + char *label; +} Constant; + +static Constant *constants = NULL; +static char **labels = NULL; + +// TODO: Separate c/h files +// TODO: Create a "driver.c" file with the (display) function for external +// linkage or assembly inlining. + // Immediate constants. #define NIL_VAL 47LU #define BOOL_MASK 127LU @@ -19,6 +30,7 @@ #define FIXNUM_SHIFT 2LU // Heap allocated objects. +#define STRING_INV_MASK ~7LU #define STRING_MASK 7LU #define STRING_TAG 3LU #define PAIR_MASK 7LU @@ -30,16 +42,15 @@ void compile_proc_call(Object *obj); void compile(Root *roots); char * -generate_label(void) { - // Generate a unique label allocated on the heap. The caller is responsible - // for freeing the memory. +generate_label(char *prefix) { static size_t label_counter = 0; char buf[32]; - sprintf(buf, ".BDLL%ld", label_counter++); + sprintf(buf, "%s%zu", prefix, label_counter++); size_t len = strlen(buf); char * ret = malloc(len + 1); memcpy(ret, buf, len); ret[len] = 0; + array_push(labels, ret); return ret; } @@ -60,7 +71,7 @@ emit_file(char *file_name) { void compile_fixnum(Object *obj) { printf(" ;; --> compile_fixnum\n"); - printf(" mov rax, %ld\n", (obj->fixnum << FIXNUM_SHIFT) | FIXNUM_TAG); + printf(" mov rax, %zu\n", (obj->fixnum << FIXNUM_SHIFT) | FIXNUM_TAG); printf(" push rax\n"); printf(" ;; <-- compile_fixnum\n"); } @@ -149,8 +160,8 @@ compile_not(Object* args) { void compile_and(Object *args) { printf(" ;; --> compile_and\n"); - char *lab_false = generate_label(); - char *lab_exit = generate_label(); + char *lab_false = generate_label("BDLL"); + char *lab_exit = generate_label("BDLL"); while (args != NULL) { compile_object(args->head); args = args->tail; @@ -165,16 +176,14 @@ compile_and(Object *args) { printf(" mov rax, FALSE_VAL\n"); printf(" push rax\n"); printf("%s:\n", lab_exit); - free(lab_false); - free(lab_exit); printf(" ;; <-- compile_and\n"); } void compile_or(Object *args) { printf(" ;; --> compile_or\n"); - char *lab_true = generate_label(); - char *lab_exit = generate_label(); + char *lab_true = generate_label("BDLL"); + char *lab_exit = generate_label("BDLL"); while (args != NULL) { compile_object(args->head); args = args->tail; @@ -189,8 +198,6 @@ compile_or(Object *args) { printf(" mov rax, TRUE_VAL\n"); printf(" push rax\n"); printf("%s:\n", lab_exit); - free(lab_true); - free(lab_exit); printf(" ;; <-- compile_or\n"); } @@ -198,8 +205,8 @@ void compile_cmp_list(OpType op, Object* args) { printf(" ;; --> compile_cmp_list\n"); compile_object(args->head); - char *lab_false = generate_label(); - char *lab_exit = generate_label(); + char *lab_false = generate_label("BDLL"); + char *lab_exit = generate_label("BDLL"); args = args->tail; while (args != NULL) { compile_object(args->head); @@ -231,8 +238,6 @@ compile_cmp_list(OpType op, Object* args) { printf(" mov rax, FALSE_VAL\n"); printf(" push rax\n"); printf("%s:\n", lab_exit); - free(lab_false); - free(lab_exit); printf(" ;; <-- compile_cmp_list\n"); } @@ -376,23 +381,51 @@ compile_proc_call(Object *obj) { void compile_if(Object *obj) { - char *lab_false = generate_label(); + char *lab_false = generate_label("BDLL"); compile_object(obj->condition); printf(" pop rax\n"); printf(" cmp rax, FALSE_VAL\n"); printf(" je %s\n", lab_false); compile_object(obj->expr_true); if (obj->expr_false != NULL) { - char *lab_exit = generate_label(); + char *lab_exit = generate_label("BDLL"); printf(" jmp %s\n", lab_exit); printf("%s:\n", lab_false); compile_object(obj->expr_false); printf("%s:\n", lab_exit); - free(lab_exit); } else { printf("%s:\n", lab_false); } - free(lab_false); +} + +void +compile_string(Object *obj) { + printf(" ;; --> compile_string\n"); + Constant c; + + // Check if the string is already stored as a constant. + ssize_t idx = -1; + for (size_t i = 0; i < array_size(constants); i++) { + c = constants[i]; + if (object_equal(c.obj, obj)) { + idx = i; + break; + } + } + if (idx < 0) { + idx = array_size(constants); + c = (Constant){ + .obj = obj, + .label = generate_label("BDLC"), + }; + array_push(constants, c); + } + + // Create a tagged pointer to the label. + printf(" mov rax, %s\n", c.label); + printf(" or rax, STRING_TAG\n"); + printf(" push rax\n"); + printf(" ;; <-- compile_string\n"); } void @@ -403,14 +436,53 @@ compile_object(Object *obj) { case OBJ_TYPE_FALSE: { compile_boolean(obj); } break; case OBJ_TYPE_FIXNUM: { compile_fixnum(obj); } break; case OBJ_TYPE_PAIR: { compile_proc_call(obj); } break; + case OBJ_TYPE_STRING: { compile_string(obj); } break; case OBJ_TYPE_IF: { compile_if(obj); } break; default: break; } } +void +emit_bss_section(void) { + printf("section .bss\n"); + printf("bdl_heap:\n"); + printf(" resb HEAP_SIZE\n"); + printf("\n"); +} + +void +emit_data_section(void) { + printf("section .data\n"); + printf("true_str: db \"true\", 10, 0, 0, 0\n"); + printf("false_str: db \"false\", 10, 0, 0\n"); + for (size_t i = 0; i < array_size(constants); i++) { + // NOTE: Only supporting string constants for now. + Constant c = constants[i]; + int n = c.obj->text.n; + // TODO: escape characters maybe? + // TODO: quote all strings maybe? + printf("%s:\n", c.label); + printf(" dq %d\n", n + 1); + printf(" db \"%.*s\", 10\n", n, c.obj->text.start); + // Ensure alignment to 8 bytes. + int remainder = (n + 1) % 8; + if (remainder != 0) { + printf(" times %d db 0\n", 8 - (n + 1) % 8); + } + } + printf("\n"); +} + void compile(Root *roots) { + // Prepare compilation variables. + array_init(constants, 0); + array_init(labels, 0); + + // Prelude. printf("%%define NIL_VAL %zu\n", NIL_VAL); + printf("%%define TRUE_VAL %zu\n", TRUE_VAL); + printf("%%define FALSE_VAL %zu\n", FALSE_VAL); printf("%%define BOOL_MASK %zu\n", BOOL_MASK); printf("%%define BOOL_TAG %zu\n", BOOL_TAG); printf("%%define BOOL_SHIFT %zu\n", BOOL_SHIFT); @@ -419,16 +491,30 @@ compile(Root *roots) { printf("%%define FIXNUM_SHIFT %zu\n", FIXNUM_SHIFT); printf("%%define PAIR_MASK %zu\n", PAIR_MASK); printf("%%define PAIR_TAG %zu\n", PAIR_TAG); + printf("%%define STRING_INV_MASK %zu\n", STRING_INV_MASK); printf("%%define STRING_MASK %zu\n", STRING_MASK); printf("%%define STRING_TAG %zu\n", STRING_TAG); printf("%%define HEAP_SIZE %zu\n", HEAP_SIZE); printf("\n"); emit_file(PRELUDE_FILE); + + // Compile program. for (size_t i = 0; i < array_size(roots); i++) { Object *root = roots[i]; compile_object(root); } + + // Postlude. emit_file(POSTLUDE_FILE); + emit_data_section(); + emit_bss_section(); + + // Clean resources. + array_free(constants); + for (size_t i = 0; i < array_size(labels); i++) { + free(labels[i]); + } + array_free(labels); } #endif // BDL_COMPILER_H diff --git a/src/x86_64/postlude.asm b/src/x86_64/postlude.asm index 37f9df1..45be7ee 100644 --- a/src/x86_64/postlude.asm +++ b/src/x86_64/postlude.asm @@ -5,11 +5,8 @@ _start_return: call display exit: - ; exit syscall + ;; exit syscall mov rax, 60 xor rdi, rdi syscall -section .bss -bdl_heap: - resb HEAP_SIZE diff --git a/src/x86_64/prelude.asm b/src/x86_64/prelude.asm index 3ad33f7..c9ef823 100644 --- a/src/x86_64/prelude.asm +++ b/src/x86_64/prelude.asm @@ -71,10 +71,16 @@ bool_write: syscall ret -true_str: - db "true", 10 -false_str: - db "false", 10 +printstring: + mov rsi, rdi + mov rax, STRING_INV_MASK + and rsi, rax + mov rdx, [rsi] + add rsi, 8 + mov rax, 1 + mov rdi, 1 + syscall + ret display: ;; is nil? @@ -91,6 +97,15 @@ display: ret not_bool: + ;; is string? + mov rax, rdi + and rax, STRING_MASK + cmp rax, STRING_TAG + jne not_string + call printstring + ret +not_string: + ;; is fixnum? mov rax, rdi call printdln -- cgit v1.2.1