1 files changed, 261 insertions, 0 deletions
diff --git a/src/compiler.c b/src/compiler.c
new file mode 100644
index 0000000..d1e8a74
--- /dev/null
+++ b/src/compiler.c
@@ -0,0 +1,261 @@
+#ifndef COMPILER_C
+#define COMPILER_C
+#include "badlib.h"
+#include "parser.c"
+typedef struct Instruction {
+    u8 dst;
+    u8 a;
+    u8 b;
+    u8 op;
+} Instruction;
+typedef union Constant {
+    s64 i;
+    u64 u;
+    double f;
+    ptrsize ptr;
+} Constant;
+typedef struct Chunk {
+    Instruction *code;
+    // Constant values that fit in 64 bits.
+    Constant *constants;
+    IntIntMap *intmap;
+    sz const_idx;
+    // Constant strings.
+    Str *strings;
+    StrIntMap *strmap;
+    sz str_idx;
+    // Number of registers currently used in this chunk.
+    sz reg_idx;
+    // Debugging.
+    Str file_name;
+    Arena *storage;
+    // TODO: line/col info for debugging.
+} Chunk;
+typedef enum OpCode {
+    //            OP    DST   A   B
+    // ---------------------------------------------------------------
+    // VM/high level instructions.
+    OP_HALT,   // halt
+    // Load/Store instructions.
+    OP_LD8K,   // ld8k   rx, ca      -> u8  rx = ca
+    OP_LD16K,  // ld16k  rx, ca      -> u16 rx = ca
+    OP_LD32K,  // ld32k  rx, ca      -> u32 rx = ca
+    OP_LD64K,  // ld64k  rx, ca      -> u64 rx = ca
+    OP_LD8I,   // ld8i   rx, ra, cb  -> u8  *p; rx = p[ra + cb]
+    OP_LD16I,  // ld16i  rx, ra, cb  -> u16 *p; rx = p[ra + cb]
+    OP_LD32I,  // ld32i  rx, ra, cb  -> u32 *p; rx = p[ra + cb]
+    OP_LD64I,  // ld64i  rx, ra, cb  -> u64 *p; rx = p[ra + cb]
+    OP_LD8,    // ld8    rx, ra, rb  -> u8  *p; rx = p[ra + rb]
+    OP_LD16,   // ld16   rx, ra, rb  -> u16 *p; rx = p[ra + rb]
+    OP_LD32,   // ld32   rx, ra, rb  -> u32 *p; rx = p[ra + rb]
+    OP_LD64,   // ld64   rx, ra, rb  -> u64 *p; rx = p[ra + rb]
+    OP_ST8I,   // st8i   rx, ra, cb  -> u8  *p; p[ra + cb] = rx
+    OP_ST16I,  // st16i  rx, ra, cb  -> u16 *p; p[ra + cb] = rx
+    OP_ST32I,  // st32i  rx, ra, cb  -> u32 *p; p[ra + cb] = rx
+    OP_ST64I,  // st64i  rx, ra, cb  -> u64 *p; p[ra + cb] = rx
+    OP_ST8,    // st8    rx, ra, rb  -> u8  *p; p[ra + rb] = rx
+    OP_ST16,   // st16   rx, ra, rb  -> u16 *p; p[ra + rb] = rx
+    OP_ST32,   // st32   rx, ra, rb  -> u32 *p; p[ra + rb] = rx
+    OP_ST64,   // st64   rx, ra, rb  -> u64 *p; p[ra + rb] = rx
+    // Integer arithmetic (only int/s64 for now).
+    OP_ADDI,   // addk   rx, ra, cb
+    OP_SUBI,   // subk   rx, ra, cb
+    OP_MULI,   // mulk   rx, ra, cb
+    OP_DIVI,   // divk   rx, ra, cb
+    OP_MODI,   // modk   rx, ra, cb
+    OP_ADD,    // add    rx, ra, rb
+    OP_SUB,    // sub    rx, ra, rb
+    OP_MUL,    // mul    rx, ra, rb
+    OP_DIV,    // div    rx, ra, rb
+    OP_MOD,    // mod    rx, ra, rb
+    // Floating point arithmetic (only f64 for now).
+    OP_ADDFI,  // addfk  rx, ra, cb
+    OP_SUBFI,  // subfk  rx, ra, cb
+    OP_MULFI,  // mulfk  rx, ra, cb
+    OP_DIVFI,  // divfk  rx, ra, cb
+    OP_MODFI,  // modfk  rx, ra, cb
+    OP_ADDF,   // addf   rx, ra, rb
+    OP_SUBF,   // subf   rx, ra, rb
+    OP_MULF,   // mulf   rx, ra, rb
+    OP_DIVF,   // divf   rx, ra, rb
+    OP_MODF,   // modf   rx, ra, rb
+    // Register-to-register copy.
+    OP_MOV8,   // mov8   rx, ra  -> rx = ra & 0xFF
+    OP_MOV16,  // mov16  rx, ra  -> rx = ra & 0xFFFF
+    OP_MOV32,  // mov32  rx, ra  -> rx = ra & 0xFFFFFFFF
+    OP_MOV64,  // mov64  rx, ra  -> rx = ra & 0xFFFFFFFFFFFFFFFF
+} OpCode;
+Str op_str[] = {
+    [OP_HALT] = cstr("HALT    "),
+    // Load ops.
+    [OP_LD8K] = cstr("LD8K    "),
+    [OP_LD16K] = cstr("LD16K   "),
+    [OP_LD32K] = cstr("LD32K   "),
+    [OP_LD64K] = cstr("LD64K   "),
+    [OP_LD8I] = cstr("LD8I    "),
+    [OP_LD16I] = cstr("LD16I   "),
+    [OP_LD32I] = cstr("LD32I   "),
+    [OP_LD64I] = cstr("LD64I   "),
+    [OP_LD8] = cstr("LD8     "),
+    [OP_LD16] = cstr("LD16    "),
+    [OP_LD32] = cstr("LD32    "),
+    [OP_LD64] = cstr("LD64    "),
+    // Store ops.
+    [OP_ST8I] = cstr("ST8I    "),
+    [OP_ST16I] = cstr("ST16I   "),
+    [OP_ST32I] = cstr("ST32I   "),
+    [OP_ST64I] = cstr("ST64I   "),
+    [OP_ST8] = cstr("ST8     "),
+    [OP_ST16] = cstr("ST16    "),
+    [OP_ST32] = cstr("ST32    "),
+    [OP_ST64] = cstr("ST64    "),
+    // Arithmetic.
+    [OP_ADDI] = cstr("ADDI    "),
+    [OP_SUBI] = cstr("SUBI    "),
+    [OP_MULI] = cstr("MULI    "),
+    [OP_DIVI] = cstr("DIVI    "),
+    [OP_MODI] = cstr("MODI    "),
+    [OP_ADD] = cstr("ADD     "),
+    [OP_SUB] = cstr("SUB     "),
+    [OP_MUL] = cstr("MUL     "),
+    [OP_DIV] = cstr("DIV     "),
+    [OP_MOD] = cstr("MOD     "),
+    [OP_ADDFI] = cstr("ADDFI   "),
+    [OP_SUBFI] = cstr("SUBFI   "),
+    [OP_MULFI] = cstr("MULFI   "),
+    [OP_DIVFI] = cstr("DIVFI   "),
+    [OP_MODFI] = cstr("MODFI   "),
+    [OP_ADDF] = cstr("ADDF    "),
+    [OP_SUBF] = cstr("SUBF    "),
+    [OP_MULF] = cstr("MULF    "),
+    [OP_DIVF] = cstr("DIVF    "),
+    // Reg copy/move.
+    [OP_MODF] = cstr("MODF    "),
+    [OP_MOV8] = cstr("MOV8    "),
+    [OP_MOV16] = cstr("MOV16   "),
+    [OP_MOV32] = cstr("MOV32   "),
+    [OP_MOV64] = cstr("MOV64   "),
+};
+typedef enum {
+    COMP_CONST,
+    COMP_STRING,
+    COMP_REG,
+    COMP_ERR,
+} CompResultType;
+typedef struct CompResult {
+    sz idx;
+    CompResultType type;
+} CompResult;
+CompResult compile_expr(Chunk *chunk, Node *node);
+#define EMIT_OP(OP, DST, A, B, NODE, CHUNK)                \
+    do {                                                   \
+        Instruction inst = (Instruction){                  \
+            .op = (OP),                                    \
+            .dst = (DST),                                  \
+            .a = (A),                                      \
+            .b = (B),                                      \
+        };                                                 \
+        array_push((CHUNK)->code, inst, (CHUNK)->storage); \
+    } while (0)
+CompResult
+compile_binary(OpCode op, Chunk *chunk, Node *node) {
+    CompResult comp_a = compile_expr(chunk, node->left);
+    CompResult comp_b = compile_expr(chunk, node->right);
+    sz reg_a;
+    sz reg_b;
+    switch (comp_a.type) {
+        case COMP_CONST: {
+            reg_a = chunk->reg_idx++;
+            EMIT_OP(OP_LD64K, reg_a, comp_a.idx, 0, node, chunk);
+        } break;
+        case COMP_REG: {
+            reg_a = comp_a.idx;
+        } break;
+        default: {
+            return (CompResult){.type = COMP_ERR};
+        } break;
+    }
+    switch (comp_b.type) {
+        case COMP_CONST: {
+            reg_b = chunk->reg_idx++;
+            EMIT_OP(OP_LD64K, reg_b, comp_b.idx, 0, node, chunk);
+        } break;
+        case COMP_REG: {
+            reg_b = comp_b.idx;
+        } break;
+        default: {
+            return (CompResult){.type = COMP_ERR};
+        } break;
+    }
+    sz reg_dst = comp_a.idx;  // Less registers
+    // sz reg_dst = chunk->reg_idx++; // Better for optimization
+    EMIT_OP(op, reg_dst, reg_a, reg_b, node, chunk);
+    return (CompResult){.type = COMP_REG, .idx = reg_dst};
+}
+CompResult
+compile_expr(Chunk *chunk, Node *node) {
+    switch (node->kind) {
+        case NODE_ADD: return compile_binary(OP_ADD, chunk, node); break;
+        case NODE_SUB: return compile_binary(OP_SUB, chunk, node); break;
+        case NODE_MUL: return compile_binary(OP_MUL, chunk, node); break;
+        case NODE_DIV: return compile_binary(OP_DIV, chunk, node); break;
+        case NODE_MOD: return compile_binary(OP_MOD, chunk, node); break;
+        case NODE_TRUE:
+        case NODE_FALSE:
+        case NODE_NUM_FLOAT:
+        case NODE_NUM_INT: {
+            sz value = node->value.i;
+            // Make sure we don't have duplicated constants.
+            IntIntMap *map = intintmap_lookup(&chunk->intmap, value);
+            if (!map) {
+                map = intintmap_insert(&chunk->intmap, value,
+                                       chunk->const_idx++, chunk->storage);
+                Constant c = (Constant){.i = node->value.i};
+                array_push(chunk->constants, c, chunk->storage);
+            }
+            return (CompResult){
+                .type = COMP_CONST,
+                .idx = map->val,
+            };
+        } break;
+        case NODE_STRING: {
+            Str string = node->value.str;
+            // Make sure we don't have duplicated strings.
+            StrIntMap *map = strintmap_lookup(&chunk->strmap, string);
+            if (!map) {
+                map = strintmap_insert(&chunk->strmap, string, chunk->str_idx++,
+                                       chunk->storage);
+                array_push(chunk->strings, string, chunk->storage);
+            }
+            return (CompResult){
+                .type = COMP_STRING,
+                .idx = map->val,
+            };
+        } break;
+        default: {
+            eprintln("error: compilation not implemented for node %s",
+                     node_str[node->kind]);
+            exit(EXIT_FAILURE);
+        } break;
+    }
+    return (CompResult){.type = COMP_ERR};
+}
+#endif  // COMPILER_C

diff --git a/src/compiler.c b/src/compiler.c new file mode 100644 index 0000000..d1e8a74 --- /dev/null +++ b/src/compiler.c
@@ -0,0 +1,261 @@
	1	#ifndef COMPILER_C
	2	#define COMPILER_C
	3
	4	#include "badlib.h"
	5
	6	#include "parser.c"
	7
	8	typedef struct Instruction {
	9	u8 dst;
	10	u8 a;
	11	u8 b;
	12	u8 op;
	13	} Instruction;
	14
	15	typedef union Constant {
	16	s64 i;
	17	u64 u;
	18	double f;
	19	ptrsize ptr;
	20	} Constant;
	21
	22	typedef struct Chunk {
	23	Instruction *code;
	24
	25	// Constant values that fit in 64 bits.
	26	Constant *constants;
	27	IntIntMap *intmap;
	28	sz const_idx;
	29
	30	// Constant strings.
	31	Str *strings;
	32	StrIntMap *strmap;
	33	sz str_idx;
	34
	35	// Number of registers currently used in this chunk.
	36	sz reg_idx;
	37
	38	// Debugging.
	39	Str file_name;
	40	Arena *storage;
	41	// TODO: line/col info for debugging.
	42	} Chunk;
	43
	44	typedef enum OpCode {
	45	// OP DST A B
	46	// ---------------------------------------------------------------
	47	// VM/high level instructions.
	48	OP_HALT, // halt
	49	// Load/Store instructions.
	50	OP_LD8K, // ld8k rx, ca -> u8 rx = ca
	51	OP_LD16K, // ld16k rx, ca -> u16 rx = ca
	52	OP_LD32K, // ld32k rx, ca -> u32 rx = ca
	53	OP_LD64K, // ld64k rx, ca -> u64 rx = ca
	54	OP_LD8I, // ld8i rx, ra, cb -> u8 *p; rx = p[ra + cb]
	55	OP_LD16I, // ld16i rx, ra, cb -> u16 *p; rx = p[ra + cb]
	56	OP_LD32I, // ld32i rx, ra, cb -> u32 *p; rx = p[ra + cb]
	57	OP_LD64I, // ld64i rx, ra, cb -> u64 *p; rx = p[ra + cb]
	58	OP_LD8, // ld8 rx, ra, rb -> u8 *p; rx = p[ra + rb]
	59	OP_LD16, // ld16 rx, ra, rb -> u16 *p; rx = p[ra + rb]
	60	OP_LD32, // ld32 rx, ra, rb -> u32 *p; rx = p[ra + rb]
	61	OP_LD64, // ld64 rx, ra, rb -> u64 *p; rx = p[ra + rb]
	62	OP_ST8I, // st8i rx, ra, cb -> u8 *p; p[ra + cb] = rx
	63	OP_ST16I, // st16i rx, ra, cb -> u16 *p; p[ra + cb] = rx
	64	OP_ST32I, // st32i rx, ra, cb -> u32 *p; p[ra + cb] = rx
	65	OP_ST64I, // st64i rx, ra, cb -> u64 *p; p[ra + cb] = rx
	66	OP_ST8, // st8 rx, ra, rb -> u8 *p; p[ra + rb] = rx
	67	OP_ST16, // st16 rx, ra, rb -> u16 *p; p[ra + rb] = rx
	68	OP_ST32, // st32 rx, ra, rb -> u32 *p; p[ra + rb] = rx
	69	OP_ST64, // st64 rx, ra, rb -> u64 *p; p[ra + rb] = rx
	70	// Integer arithmetic (only int/s64 for now).
	71	OP_ADDI, // addk rx, ra, cb
	72	OP_SUBI, // subk rx, ra, cb
	73	OP_MULI, // mulk rx, ra, cb
	74	OP_DIVI, // divk rx, ra, cb
	75	OP_MODI, // modk rx, ra, cb
	76	OP_ADD, // add rx, ra, rb
	77	OP_SUB, // sub rx, ra, rb
	78	OP_MUL, // mul rx, ra, rb
	79	OP_DIV, // div rx, ra, rb
	80	OP_MOD, // mod rx, ra, rb
	81	// Floating point arithmetic (only f64 for now).
	82	OP_ADDFI, // addfk rx, ra, cb
	83	OP_SUBFI, // subfk rx, ra, cb
	84	OP_MULFI, // mulfk rx, ra, cb
	85	OP_DIVFI, // divfk rx, ra, cb
	86	OP_MODFI, // modfk rx, ra, cb
	87	OP_ADDF, // addf rx, ra, rb
	88	OP_SUBF, // subf rx, ra, rb
	89	OP_MULF, // mulf rx, ra, rb
	90	OP_DIVF, // divf rx, ra, rb
	91	OP_MODF, // modf rx, ra, rb
	92	// Register-to-register copy.
	93	OP_MOV8, // mov8 rx, ra -> rx = ra & 0xFF
	94	OP_MOV16, // mov16 rx, ra -> rx = ra & 0xFFFF
	95	OP_MOV32, // mov32 rx, ra -> rx = ra & 0xFFFFFFFF
	96	OP_MOV64, // mov64 rx, ra -> rx = ra & 0xFFFFFFFFFFFFFFFF
	97	} OpCode;
	98
	99	Str op_str[] = {
	100	[OP_HALT] = cstr("HALT "),
	101	// Load ops.
	102	[OP_LD8K] = cstr("LD8K "),
	103	[OP_LD16K] = cstr("LD16K "),
	104	[OP_LD32K] = cstr("LD32K "),
	105	[OP_LD64K] = cstr("LD64K "),
	106	[OP_LD8I] = cstr("LD8I "),
	107	[OP_LD16I] = cstr("LD16I "),
	108	[OP_LD32I] = cstr("LD32I "),
	109	[OP_LD64I] = cstr("LD64I "),
	110	[OP_LD8] = cstr("LD8 "),
	111	[OP_LD16] = cstr("LD16 "),
	112	[OP_LD32] = cstr("LD32 "),
	113	[OP_LD64] = cstr("LD64 "),
	114	// Store ops.
	115	[OP_ST8I] = cstr("ST8I "),
	116	[OP_ST16I] = cstr("ST16I "),
	117	[OP_ST32I] = cstr("ST32I "),
	118	[OP_ST64I] = cstr("ST64I "),
	119	[OP_ST8] = cstr("ST8 "),
	120	[OP_ST16] = cstr("ST16 "),
	121	[OP_ST32] = cstr("ST32 "),
	122	[OP_ST64] = cstr("ST64 "),
	123	// Arithmetic.
	124	[OP_ADDI] = cstr("ADDI "),
	125	[OP_SUBI] = cstr("SUBI "),
	126	[OP_MULI] = cstr("MULI "),
	127	[OP_DIVI] = cstr("DIVI "),
	128	[OP_MODI] = cstr("MODI "),
	129	[OP_ADD] = cstr("ADD "),
	130	[OP_SUB] = cstr("SUB "),
	131	[OP_MUL] = cstr("MUL "),
	132	[OP_DIV] = cstr("DIV "),
	133	[OP_MOD] = cstr("MOD "),
	134	[OP_ADDFI] = cstr("ADDFI "),
	135	[OP_SUBFI] = cstr("SUBFI "),
	136	[OP_MULFI] = cstr("MULFI "),
	137	[OP_DIVFI] = cstr("DIVFI "),
	138	[OP_MODFI] = cstr("MODFI "),
	139	[OP_ADDF] = cstr("ADDF "),
	140	[OP_SUBF] = cstr("SUBF "),
	141	[OP_MULF] = cstr("MULF "),
	142	[OP_DIVF] = cstr("DIVF "),
	143	// Reg copy/move.
	144	[OP_MODF] = cstr("MODF "),
	145	[OP_MOV8] = cstr("MOV8 "),
	146	[OP_MOV16] = cstr("MOV16 "),
	147	[OP_MOV32] = cstr("MOV32 "),
	148	[OP_MOV64] = cstr("MOV64 "),
	149	};
	150
	151	typedef enum {
	152	COMP_CONST,
	153	COMP_STRING,
	154	COMP_REG,
	155	COMP_ERR,
	156	} CompResultType;
	157
	158	typedef struct CompResult {
	159	sz idx;
	160	CompResultType type;
	161	} CompResult;
	162
	163	CompResult compile_expr(Chunk chunk, Node node);
	164
	165	#define EMIT_OP(OP, DST, A, B, NODE, CHUNK) \
	166	do { \
	167	Instruction inst = (Instruction){ \
	168	.op = (OP), \
	169	.dst = (DST), \
	170	.a = (A), \
	171	.b = (B), \
	172	}; \
	173	array_push((CHUNK)->code, inst, (CHUNK)->storage); \
	174	} while (0)
	175
	176	CompResult
	177	compile_binary(OpCode op, Chunk chunk, Node node) {
	178	CompResult comp_a = compile_expr(chunk, node->left);
	179	CompResult comp_b = compile_expr(chunk, node->right);
	180	sz reg_a;
	181	sz reg_b;
	182	switch (comp_a.type) {
	183	case COMP_CONST: {
	184	reg_a = chunk->reg_idx++;
	185	EMIT_OP(OP_LD64K, reg_a, comp_a.idx, 0, node, chunk);
	186	} break;
	187	case COMP_REG: {
	188	reg_a = comp_a.idx;
	189	} break;
	190	default: {
	191	return (CompResult){.type = COMP_ERR};
	192	} break;
	193	}
	194	switch (comp_b.type) {
	195	case COMP_CONST: {
	196	reg_b = chunk->reg_idx++;
	197	EMIT_OP(OP_LD64K, reg_b, comp_b.idx, 0, node, chunk);
	198	} break;
	199	case COMP_REG: {
	200	reg_b = comp_b.idx;
	201	} break;
	202	default: {
	203	return (CompResult){.type = COMP_ERR};
	204	} break;
	205	}
	206	sz reg_dst = comp_a.idx; // Less registers
	207	// sz reg_dst = chunk->reg_idx++; // Better for optimization
	208	EMIT_OP(op, reg_dst, reg_a, reg_b, node, chunk);
	209	return (CompResult){.type = COMP_REG, .idx = reg_dst};
	210	}
	211
	212	CompResult
	213	compile_expr(Chunk chunk, Node node) {
	214	switch (node->kind) {
	215	case NODE_ADD: return compile_binary(OP_ADD, chunk, node); break;
	216	case NODE_SUB: return compile_binary(OP_SUB, chunk, node); break;
	217	case NODE_MUL: return compile_binary(OP_MUL, chunk, node); break;
	218	case NODE_DIV: return compile_binary(OP_DIV, chunk, node); break;
	219	case NODE_MOD: return compile_binary(OP_MOD, chunk, node); break;
	220	case NODE_TRUE:
	221	case NODE_FALSE:
	222	case NODE_NUM_FLOAT:
	223	case NODE_NUM_INT: {
	224	sz value = node->value.i;
	225	// Make sure we don't have duplicated constants.
	226	IntIntMap *map = intintmap_lookup(&chunk->intmap, value);
	227	if (!map) {
	228	map = intintmap_insert(&chunk->intmap, value,
	229	chunk->const_idx++, chunk->storage);
	230	Constant c = (Constant){.i = node->value.i};
	231	array_push(chunk->constants, c, chunk->storage);
	232	}
	233	return (CompResult){
	234	.type = COMP_CONST,
	235	.idx = map->val,
	236	};
	237	} break;
	238	case NODE_STRING: {
	239	Str string = node->value.str;
	240	// Make sure we don't have duplicated strings.
	241	StrIntMap *map = strintmap_lookup(&chunk->strmap, string);
	242	if (!map) {
	243	map = strintmap_insert(&chunk->strmap, string, chunk->str_idx++,
	244	chunk->storage);
	245	array_push(chunk->strings, string, chunk->storage);
	246	}
	247	return (CompResult){
	248	.type = COMP_STRING,
	249	.idx = map->val,
	250	};
	251	} break;
	252	default: {
	253	eprintln("error: compilation not implemented for node %s",
	254	node_str[node->kind]);
	255	exit(EXIT_FAILURE);
	256	} break;
	257	}
	258	return (CompResult){.type = COMP_ERR};
	259	}
	260
	261	#endif // COMPILER_C