aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/badlib.h31
-rw-r--r--src/compiler.c1713
-rw-r--r--src/lexer.c38
-rw-r--r--src/main.c185
-rw-r--r--src/parser.c462
-rw-r--r--src/semantic.c2598
-rw-r--r--src/vm.c506
7 files changed, 3302 insertions, 2231 deletions
diff --git a/src/badlib.h b/src/badlib.h
index c73b57f..7a78e3b 100644
--- a/src/badlib.h
+++ b/src/badlib.h
@@ -79,6 +79,22 @@ typedef size_t usize;
79 (MACRO_VAR(_i_) += 1), end) 79 (MACRO_VAR(_i_) += 1), end)
80 80
81// 81//
82// Utility functions.
83//
84
85sz
86next_power_of_2(u32 x) {
87 x--;
88 x |= x >> 1;
89 x |= x >> 2;
90 x |= x >> 4;
91 x |= x >> 8;
92 x |= x >> 16;
93 x++;
94 return x;
95}
96
97//
82// Allocators and Arenas. 98// Allocators and Arenas.
83// 99//
84 100
@@ -131,6 +147,9 @@ arena_free(void *ptr, sz size, void *ctx) {
131 147
132void * 148void *
133arena_realloc(void *ptr, sz old_size, sz new_size, void *ctx) { 149arena_realloc(void *ptr, sz old_size, sz new_size, void *ctx) {
150 if (!ptr) {
151 return arena_malloc(new_size, ctx);
152 }
134 // This function can avoid copying memory around if we could just extend the 153 // This function can avoid copying memory around if we could just extend the
135 // latest allocation, otherwise a new malloc will be performed (keeping the 154 // latest allocation, otherwise a new malloc will be performed (keeping the
136 // previous data alive!). 155 // previous data alive!).
@@ -189,7 +208,7 @@ typedef struct SearchResult {
189 208
190bool 209bool
191array_eq(Array a, Array b) { 210array_eq(Array a, Array b) {
192 return a.size == b.size && !memcmp(a.mem, b.mem, a.size); 211 return a.size == b.size && a.mem && b.mem && !memcmp(a.mem, b.mem, a.size);
193} 212}
194 213
195SearchResult 214SearchResult
@@ -306,6 +325,12 @@ str_eq(Str a, Str b) {
306 return array_eq(a, b); 325 return array_eq(a, b);
307} 326}
308 327
328int
329str_cmp(Str a, Str b) {
330 return strncmp((const char *)a.mem, (const char *)b.mem,
331 MIN(a.size, b.size));
332}
333
309Str 334Str
310str_split(Str *a, Str split) { 335str_split(Str *a, Str split) {
311 assert(a != NULL); 336 assert(a != NULL);
@@ -1002,6 +1027,10 @@ typedef struct ArrayHeader {
1002// Free the memory from the original allocated position. 1027// Free the memory from the original allocated position.
1003#define array_free(ARR) ((ARR) ? free(array_head(ARR)), (ARR) = NULL : 0) 1028#define array_free(ARR) ((ARR) ? free(array_head(ARR)), (ARR) = NULL : 0)
1004 1029
1030// Sort an array in place.
1031#define array_sort(ARR, FN) \
1032 if (ARR) qsort((ARR), array_size((ARR)), sizeof((ARR)[0]), (FN));
1033
1005static inline void * 1034static inline void *
1006_array_reserve(sz num_elem, sz type_size, Arena *a) { 1035_array_reserve(sz num_elem, sz type_size, Arena *a) {
1007 u8 *p = arena_malloc(num_elem * type_size + sizeof(ArrayHeader), a); 1036 u8 *p = arena_malloc(num_elem * type_size + sizeof(ArrayHeader), a);
diff --git a/src/compiler.c b/src/compiler.c
index 11720d5..f26a439 100644
--- a/src/compiler.c
+++ b/src/compiler.c
@@ -4,22 +4,13 @@
4#include "badlib.h" 4#include "badlib.h"
5 5
6#include "parser.c" 6#include "parser.c"
7 7#include "semantic.c"
8typedef struct Variable {
9 Str name;
10 Str type;
11 sz size;
12 sz offset;
13 sz idx;
14} Variable;
15
16MAPDEF(StrVarMap, varmap, Str, Variable, str_hash, str_eq)
17 8
18typedef struct Instruction { 9typedef struct Instruction {
19 u8 dst; 10 u16 dst;
20 u8 a; 11 u16 a;
21 u8 b; 12 u16 b;
22 u8 op; 13 u16 op;
23} Instruction; 14} Instruction;
24 15
25typedef union Constant { 16typedef union Constant {
@@ -34,19 +25,9 @@ typedef struct LineCol {
34 sz col; 25 sz col;
35} LineCol; 26} LineCol;
36 27
37typedef struct Function {
38 Str name;
39 sz param_arity;
40 sz return_arity;
41 sz index;
42} Function;
43
44MAPDEF(FunctionMap, funcmap, Str, Function, str_hash, str_eq)
45
46typedef struct Chunk { 28typedef struct Chunk {
47 sz id; 29 sz id;
48 Str name; 30 Str name;
49 struct Chunk *parent;
50 31
51 Instruction *code; 32 Instruction *code;
52 IntIntMap *labels; // label -> chunk_index 33 IntIntMap *labels; // label -> chunk_index
@@ -62,19 +43,11 @@ typedef struct Chunk {
62 StrIntMap *strmap; 43 StrIntMap *strmap;
63 sz str_idx; 44 sz str_idx;
64 45
65 // Global/Local variables.
66 Variable *vars;
67 StrVarMap *varmap;
68 sz var_off;
69 sz param_off;
70
71 // Number of registers currently used in this chunk. 46 // Number of registers currently used in this chunk.
72 sz reg_idx; 47 sz reg_idx;
73 48
74 // Number of functions currently used in this chunk. 49 // Associated function metadata.
75 struct Chunk **functions; 50 Function fun;
76 FunctionMap *funmap;
77 sz fun_idx;
78 51
79 // Debugging. 52 // Debugging.
80 Str file_name; 53 Str file_name;
@@ -83,17 +56,25 @@ typedef struct Chunk {
83} Chunk; 56} Chunk;
84 57
85typedef struct Compiler { 58typedef struct Compiler {
86 Chunk main_chunk;
87 Str file_name; 59 Str file_name;
88 Arena *storage; 60 Arena *storage;
89 61
90 // Tables. 62 // Quick search tables for base types.
91 StrSet *integer_types;
92 StrSet *numeric_types; 63 StrSet *numeric_types;
64 StrSet *integer_types;
65 StrSet *uint_types;
66 StrSet *sint_types;
67 StrSet *float_types;
68 SymbolMap *symbols;
69
70 // All the compiled chunks for this file.
71 struct Chunk **chunks;
93 72
94 // Destinations. 73 // Destinations.
95 sz lab_pre; 74 sz lab_pre;
96 sz lab_post; 75 sz lab_post;
76 sz reg_addr;
77 bool has_addr;
97} Compiler; 78} Compiler;
98 79
99typedef enum OpCode { 80typedef enum OpCode {
@@ -102,19 +83,24 @@ typedef enum OpCode {
102 // VM/high level instructions. 83 // VM/high level instructions.
103 OP_HALT, // halt 84 OP_HALT, // halt
104 // NOTE: LDGVAR/STGVAR* could be obtained in terms of LDGADDR. 85 // NOTE: LDGVAR/STGVAR* could be obtained in terms of LDGADDR.
105 OP_STGVARI, // stgvari vx, ca 86 OP_STGVARI, // stgvari vx, ca
106 OP_STGVAR, // stgvar vx, ra 87 OP_STGVAR, // stgvar vx, ra
107 OP_LDGVAR, // ldgvar rx, va 88 OP_LDGVAR, // ldgvar rx, va
108 OP_LDGADDR, // ldgaddr rx, va 89 OP_LDGADDR, // ldgaddr rx, va
109 OP_STLVARI, // stlvari vx, ca 90 OP_STLVARI, // stlvari vx, ca
110 OP_STLVAR, // stlvar vx, ra 91 OP_STLVAR, // stlvar vx, ra
111 OP_LDLVAR, // ldlvar rx, va 92 OP_LDLVAR, // ldlvar rx, va
112 OP_LDLADDR, // ldladdr rx, va 93 OP_LDLADDR, // ldladdr rx, va
94 OP_LDFUNPTR, // ldladdr rx, fa ; Load the address of function fa into rx.
113 OP_LDSTR, // ldstr rx, sa ; Stores the address of the string sa into rx 95 OP_LDSTR, // ldstr rx, sa ; Stores the address of the string sa into rx
96 OP_MEMCPY, // memcpy rx, ra, rb ; memcpy(dst = rx, src = ra, rb = nbytes)
97 OP_MEMCPYI, // memcpyi rx, ra, cb
114 // Functions. 98 // Functions.
115 OP_CALL, // call fx ; Bumps the stack pointer by cx 99 OP_CALL, // call fx ; Call the function fx
116 OP_RET, // ret ; Returns from current function 100 OP_RECUR, // recur fx ; Jump to fx without changing the stack.
117 OP_RESERVE, // reserve cx ; Increments the stack pointer by cx bytes 101 OP_RECUR_SELF, // recur ; Jump to the beginning of the current fx.
102 OP_RET, // ret ; Returns from current function
103 OP_RESERVE, // reserve cx ; Increments the stack pointer by cx bytes
118 OP_POP, // pop rx ; Pops the last value of the stack into rx. 104 OP_POP, // pop rx ; Pops the last value of the stack into rx.
119 OP_PUSH, // push rx ; Push the rx value to the stack. 105 OP_PUSH, // push rx ; Push the rx value to the stack.
120 OP_PUSHI, // pushi cx ; Push the cx value to the stack. 106 OP_PUSHI, // pushi cx ; Push the cx value to the stack.
@@ -123,69 +109,94 @@ typedef enum OpCode {
123 // Printing values with builtin print/println functions. 109 // Printing values with builtin print/println functions.
124 OP_PRINTSTR, // p rx 110 OP_PRINTSTR, // p rx
125 OP_PRINTSTRI, // p cx 111 OP_PRINTSTRI, // p cx
112 OP_PRINTS8, // p rx
113 OP_PRINTS8I, // p cx
114 OP_PRINTS16, // p rx
115 OP_PRINTS16I, // p cx
116 OP_PRINTS32, // p rx
117 OP_PRINTS32I, // p cx
126 OP_PRINTS64, // p rx 118 OP_PRINTS64, // p rx
127 OP_PRINTS64I, // p cx 119 OP_PRINTS64I, // p cx
120 OP_PRINTU8, // p rx
121 OP_PRINTU8I, // p cx
122 OP_PRINTU16, // p rx
123 OP_PRINTU16I, // p cx
124 OP_PRINTU32, // p rx
125 OP_PRINTU32I, // p cx
126 OP_PRINTU64, // p rx
127 OP_PRINTU64I, // p cx
128 OP_PRINTF64, // p rx 128 OP_PRINTF64, // p rx
129 OP_PRINTF64I, // p cx 129 OP_PRINTF64I, // p cx
130 OP_PRINTF32, // p cx
131 OP_PRINTF32I, // p cx
130 OP_PRINTBOOL, // p rx 132 OP_PRINTBOOL, // p rx
131 OP_PRINTBOOLI, // p cx 133 OP_PRINTBOOLI, // p cx
132 // Load/Store instructions. 134 // Load/Store instructions.
133 OP_LD8K, // ld8k rx, ca -> u8 rx = ca 135 OP_LDCONST, // ldconst rx, ca -> u64 rx = ca
134 OP_LD16K, // ld16k rx, ca -> u16 rx = ca 136 OP_LD8K, // ld8k rx, ra -> u8 *p = ra; rx = *p
135 OP_LD32K, // ld32k rx, ca -> u32 rx = ca 137 OP_LD16K, // ld16k rx, ra -> u16 *p = ra; rx = *p
136 OP_LD64K, // ld64k rx, ca -> u64 rx = ca 138 OP_LD32K, // ld32k rx, ra -> u32 *p = ra; rx = *p
137 OP_LD8I, // ld8i rx, ra, cb -> u8 *p = ra; rx = p[cb] 139 OP_LD64K, // ld64k rx, ra -> u64 *p = ra; rx = *p
138 OP_LD16I, // ld16i rx, ra, cb -> u16 *p = ra; rx = p[cb] 140 OP_ST8K, // ld8k rx, ra -> u8 *p = ra; *p = rx
139 OP_LD32I, // ld32i rx, ra, cb -> u32 *p = ra; rx = p[cb] 141 OP_ST16K, // ld16k rx, ra -> u16 *p = ra; *p = rx
140 OP_LD64I, // ld64i rx, ra, cb -> u64 *p = ra; rx = p[cb] 142 OP_ST32K, // ld32k rx, ra -> u32 *p = ra; *p = rx
141 OP_LD8, // ld8 rx, ra, rb -> u8 *p = ra; rx = p[rb] 143 OP_ST64K, // ld64k rx, ra -> u64 *p = ra; *p = rx
142 OP_LD16, // ld16 rx, ra, rb -> u16 *p = ra; rx = p[rb] 144 OP_LD8I, // ld8i rx, ra, cb -> u8 *p = ra; rx = p[cb]
143 OP_LD32, // ld32 rx, ra, rb -> u32 *p = ra; rx = p[rb] 145 OP_LD16I, // ld16i rx, ra, cb -> u16 *p = ra; rx = p[cb]
144 OP_LD64, // ld64 rx, ra, rb -> u64 *p = ra; rx = p[rb] 146 OP_LD32I, // ld32i rx, ra, cb -> u32 *p = ra; rx = p[cb]
145 OP_ST8I, // st8i rx, ra, cb -> u8 *p = ra; p[cb] = rx 147 OP_LD64I, // ld64i rx, ra, cb -> u64 *p = ra; rx = p[cb]
146 OP_ST16I, // st16i rx, ra, cb -> u16 *p = ra; p[cb] = rx 148 OP_LD8, // ld8 rx, ra, rb -> u8 *p = ra; rx = p[rb]
147 OP_ST32I, // st32i rx, ra, cb -> u32 *p = ra; p[cb] = rx 149 OP_LD16, // ld16 rx, ra, rb -> u16 *p = ra; rx = p[rb]
148 OP_ST64I, // st64i rx, ra, cb -> u64 *p = ra; p[cb] = rx 150 OP_LD32, // ld32 rx, ra, rb -> u32 *p = ra; rx = p[rb]
149 OP_ST8, // st8 rx, ra, rb -> u8 *p = ra; p[rb] = rx 151 OP_LD64, // ld64 rx, ra, rb -> u64 *p = ra; rx = p[rb]
150 OP_ST16, // st16 rx, ra, rb -> u16 *p = ra; p[rb] = rx 152 OP_ST8I, // st8i rx, ra, cb -> u8 *p = ra; p[cb] = rx
151 OP_ST32, // st32 rx, ra, rb -> u32 *p = ra; p[rb] = rx 153 OP_ST16I, // st16i rx, ra, cb -> u16 *p = ra; p[cb] = rx
152 OP_ST64, // st64 rx, ra, rb -> u64 *p = ra; p[rb] = rx 154 OP_ST32I, // st32i rx, ra, cb -> u32 *p = ra; p[cb] = rx
155 OP_ST64I, // st64i rx, ra, cb -> u64 *p = ra; p[cb] = rx
156 OP_ST8, // st8 rx, ra, rb -> u8 *p = ra; p[rb] = rx
157 OP_ST16, // st16 rx, ra, rb -> u16 *p = ra; p[rb] = rx
158 OP_ST32, // st32 rx, ra, rb -> u32 *p = ra; p[rb] = rx
159 OP_ST64, // st64 rx, ra, rb -> u64 *p = ra; p[rb] = rx
153 // Integer arithmetic (only int/s64 for now). 160 // Integer arithmetic (only int/s64 for now).
154 OP_ADDI, // addk rx, ra, cb 161 OP_ADDI, // addi rx, ra, cb
155 OP_SUBI, // subk rx, ra, cb 162 OP_SUBI, // subi rx, ra, cb
156 OP_MULI, // mulk rx, ra, cb 163 OP_MULI, // muli rx, ra, cb
157 OP_DIVI, // divk rx, ra, cb 164 OP_DIVI, // divi rx, ra, cb
158 OP_MODI, // modk rx, ra, cb 165 OP_MODI, // modi rx, ra, cb
159 OP_ADD, // add rx, ra, rb 166 OP_ADD, // add rx, ra, rb
160 OP_SUB, // sub rx, ra, rb 167 OP_SUB, // sub rx, ra, rb
161 OP_MUL, // mul rx, ra, rb 168 OP_MUL, // mul rx, ra, rb
162 OP_DIV, // div rx, ra, rb 169 OP_DIV, // div rx, ra, rb
163 OP_MOD, // mod rx, ra, rb 170 OP_MOD, // mod rx, ra, rb
164 // Floating point arithmetic (only f64 for now). 171 // Floating point arithmetic (only f64 for now).
165 OP_ADDFI, // addfk rx, ra, cb 172 OP_ADDFI, // addfi rx, ra, cb
166 OP_SUBFI, // subfk rx, ra, cb 173 OP_SUBFI, // subfi rx, ra, cb
167 OP_MULFI, // mulfk rx, ra, cb 174 OP_MULFI, // mulfi rx, ra, cb
168 OP_DIVFI, // divfk rx, ra, cb 175 OP_DIVFI, // divfi rx, ra, cb
169 OP_MODFI, // modfk rx, ra, cb 176 OP_MODFI, // modfi rx, ra, cb
170 OP_ADDF, // addf rx, ra, rb 177 OP_ADDF, // addf rx, ra, rb
171 OP_SUBF, // subf rx, ra, rb 178 OP_SUBF, // subf rx, ra, rb
172 OP_MULF, // mulf rx, ra, rb 179 OP_MULF, // mulf rx, ra, rb
173 OP_DIVF, // divf rx, ra, rb 180 OP_DIVF, // divf rx, ra, rb
174 OP_MODF, // modf rx, ra, rb 181 OP_MODF, // modf rx, ra, rb
175 // Register-to-register copy. 182 // Register-to-register copy.
176 OP_MOV8, // mov8 rx, ra -> rx = ra & 0xFF 183 OP_MOV8, // mov8 rx, ra -> rx = ra & 0xFF
177 OP_MOV16, // mov16 rx, ra -> rx = ra & 0xFFFF 184 OP_MOV16, // mov16 rx, ra -> rx = ra & 0xFFFF
178 OP_MOV32, // mov32 rx, ra -> rx = ra & 0xFFFFFFFF 185 OP_MOV32, // mov32 rx, ra -> rx = ra & 0xFFFFFFFF
179 OP_MOV64, // mov64 rx, ra -> rx = ra & 0xFFFFFFFFFFFFFFFF 186 OP_MOV64, // mov64 rx, ra -> rx = ra & 0xFFFFFFFFFFFFFFFF
187 OP_MOV8I, // mov8 rx, ca -> rx = ca & 0xFF
188 OP_MOV16I, // mov16 rx, ca -> rx = ca & 0xFFFF
189 OP_MOV32I, // mov32 rx, ca -> rx = ca & 0xFFFFFFFF
190 OP_MOV64I, // mov64 rx, ca -> rx = ca & 0xFFFFFFFFFFFFFFFF
180 // Logic operations (only 64 bits for now). 191 // Logic operations (only 64 bits for now).
181 OP_EQI, // eqk rx, ra, cb 192 OP_EQI, // eqi rx, ra, cb
182 OP_NEQI, // neqk rx, ra, cb 193 OP_NEQI, // neqi rx, ra, cb
183 OP_LTI, // ltk rx, ra, cb 194 OP_LTI, // lti rx, ra, cb
184 OP_GTI, // gtk rx, ra, cb 195 OP_GTI, // gti rx, ra, cb
185 OP_LEI, // lek rx, ra, cb 196 OP_LEI, // lei rx, ra, cb
186 OP_GEI, // gek rx, ra, cb 197 OP_GEI, // gei rx, ra, cb
187 OP_ANDI, // andk rx, ra, cb 198 OP_ANDI, // andi rx, ra, cb
188 OP_ORI, // ork rx, ra, cb 199 OP_ORI, // ori rx, ra, cb
189 OP_NOTI, // noti rx, ra 200 OP_NOTI, // noti rx, ra
190 OP_EQ, // eq rx, ra, rb 201 OP_EQ, // eq rx, ra, rb
191 OP_NEQ, // neq rx, ra, rb 202 OP_NEQ, // neq rx, ra, rb
@@ -215,123 +226,154 @@ typedef enum OpCode {
215 OP_JMPT, // jmpt lx, rx ; jmp to label lx if rx is true 226 OP_JMPT, // jmpt lx, rx ; jmp to label lx if rx is true
216 OP_JMPFI, // jmpf lx, cx ; jmp to label lx if rx is false 227 OP_JMPFI, // jmpf lx, cx ; jmp to label lx if rx is false
217 OP_JMPTI, // jmpt lx, cx ; jmp to label lx if rx is true 228 OP_JMPTI, // jmpt lx, cx ; jmp to label lx if rx is true
229 _OP_NUM,
218} OpCode; 230} OpCode;
219 231
220Str op_str[] = { 232Str op_str[] = {
221 // High level ops. 233 // High level ops.
222 [OP_HALT] = cstr("HALT "), 234 [OP_HALT] = cstr("HALT"),
223 [OP_STGVAR] = cstr("STGVAR "), 235 [OP_STGVAR] = cstr("STGVAR"),
224 [OP_STGVARI] = cstr("STGVARI "), 236 [OP_STGVARI] = cstr("STGVARI"),
225 [OP_LDGVAR] = cstr("LDGVAR "), 237 [OP_LDGVAR] = cstr("LDGVAR"),
226 [OP_LDGADDR] = cstr("LDGADDR "), 238 [OP_LDGADDR] = cstr("LDGADDR"),
227 [OP_STLVAR] = cstr("STLVAR "), 239 [OP_STLVAR] = cstr("STLVAR"),
228 [OP_STLVARI] = cstr("STLVARI "), 240 [OP_STLVARI] = cstr("STLVARI"),
229 [OP_LDLVAR] = cstr("LDLVAR "), 241 [OP_LDLVAR] = cstr("LDLVAR"),
230 [OP_LDLADDR] = cstr("LDLADDR "), 242 [OP_LDLADDR] = cstr("LDLADDR"),
231 [OP_LDSTR] = cstr("LDSTR "), 243 [OP_LDFUNPTR] = cstr("LDFUNPTR"),
232 [OP_PRINTSTR] = cstr("PRNSTR "), 244 [OP_LDSTR] = cstr("LDSTR"),
233 [OP_PRINTSTRI] = cstr("PRNSTRI "), 245 [OP_PRINTSTR] = cstr("PRNSTR"),
234 [OP_PRINTS64] = cstr("PRNS64 "), 246 [OP_PRINTSTRI] = cstr("PRNSTRI"),
235 [OP_PRINTS64I] = cstr("PRNS64I "), 247 [OP_PRINTS8] = cstr("PRNS8"),
236 [OP_PRINTF64] = cstr("PRNF64 "), 248 [OP_PRINTS8I] = cstr("PRNS8I"),
237 [OP_PRINTF64I] = cstr("PRNF64I "), 249 [OP_PRINTS16] = cstr("PRNS16"),
238 [OP_PRINTBOOL] = cstr("PRNBOOL "), 250 [OP_PRINTS16I] = cstr("PRNS16I"),
251 [OP_PRINTS32] = cstr("PRNS32"),
252 [OP_PRINTS32I] = cstr("PRNS32I"),
253 [OP_PRINTS64] = cstr("PRNS64"),
254 [OP_PRINTS64I] = cstr("PRNS64I"),
255 [OP_PRINTU8] = cstr("PRNU8"),
256 [OP_PRINTU8I] = cstr("PRNU8I"),
257 [OP_PRINTU16] = cstr("PRNU16"),
258 [OP_PRINTU16I] = cstr("PRNU16I"),
259 [OP_PRINTU32] = cstr("PRNU32"),
260 [OP_PRINTU32I] = cstr("PRNU32I"),
261 [OP_PRINTU64] = cstr("PRNU64"),
262 [OP_PRINTU64I] = cstr("PRNU64I"),
263 [OP_PRINTF32] = cstr("PRNF32"),
264 [OP_PRINTF32I] = cstr("PRNF32I"),
265 [OP_PRINTF64] = cstr("PRNF64"),
266 [OP_PRINTF64I] = cstr("PRNF64I"),
267 [OP_PRINTBOOL] = cstr("PRNBOOL"),
239 [OP_PRINTBOOLI] = cstr("PRNBOOLI"), 268 [OP_PRINTBOOLI] = cstr("PRNBOOLI"),
240 [OP_PUTRET] = cstr("PUTRET "), 269 [OP_PUTRET] = cstr("PUTRET"),
241 [OP_PUTRETI] = cstr("PUTRETI "), 270 [OP_PUTRETI] = cstr("PUTRETI"),
271 [OP_MEMCPY] = cstr("MEMCPY"),
272 [OP_MEMCPYI] = cstr("MEMCPYI"),
242 // Functions. 273 // Functions.
243 [OP_CALL] = cstr("CALL "), 274 [OP_CALL] = cstr("CALL"),
244 [OP_RET] = cstr("RET "), 275 [OP_RECUR] = cstr("RECUR"),
245 [OP_RESERVE] = cstr("RESERVE "), 276 [OP_RECUR_SELF] = cstr("RECURSLF"),
246 [OP_POP] = cstr("POP "), 277 [OP_RET] = cstr("RET"),
247 [OP_PUSH] = cstr("PUSH "), 278 [OP_RESERVE] = cstr("RESERVE"),
248 [OP_PUSHI] = cstr("PUSHI "), 279 [OP_POP] = cstr("POP"),
280 [OP_PUSH] = cstr("PUSH"),
281 [OP_PUSHI] = cstr("PUSHI"),
249 // Load ops. 282 // Load ops.
250 [OP_LD8K] = cstr("LD8K "), 283 [OP_LDCONST] = cstr("LDCONST"),
251 [OP_LD16K] = cstr("LD16K "), 284 [OP_LD8K] = cstr("LD8K"),
252 [OP_LD32K] = cstr("LD32K "), 285 [OP_LD16K] = cstr("LD16K"),
253 [OP_LD64K] = cstr("LD64K "), 286 [OP_LD32K] = cstr("LD32K"),
254 [OP_LD8I] = cstr("LD8I "), 287 [OP_LD64K] = cstr("LD64K"),
255 [OP_LD16I] = cstr("LD16I "), 288 [OP_ST8K] = cstr("ST8K"),
256 [OP_LD32I] = cstr("LD32I "), 289 [OP_ST16K] = cstr("ST16K"),
257 [OP_LD64I] = cstr("LD64I "), 290 [OP_ST32K] = cstr("ST32K"),
258 [OP_LD8] = cstr("LD8 "), 291 [OP_ST64K] = cstr("ST64K"),
259 [OP_LD16] = cstr("LD16 "), 292 [OP_LD8I] = cstr("LD8I"),
260 [OP_LD32] = cstr("LD32 "), 293 [OP_LD16I] = cstr("LD16I"),
261 [OP_LD64] = cstr("LD64 "), 294 [OP_LD32I] = cstr("LD32I"),
295 [OP_LD64I] = cstr("LD64I"),
296 [OP_LD8] = cstr("LD8"),
297 [OP_LD16] = cstr("LD16"),
298 [OP_LD32] = cstr("LD32"),
299 [OP_LD64] = cstr("LD64"),
262 // Store ops. 300 // Store ops.
263 [OP_ST8I] = cstr("ST8I "), 301 [OP_ST8I] = cstr("ST8I"),
264 [OP_ST16I] = cstr("ST16I "), 302 [OP_ST16I] = cstr("ST16I"),
265 [OP_ST32I] = cstr("ST32I "), 303 [OP_ST32I] = cstr("ST32I"),
266 [OP_ST64I] = cstr("ST64I "), 304 [OP_ST64I] = cstr("ST64I"),
267 [OP_ST8] = cstr("ST8 "), 305 [OP_ST8] = cstr("ST8"),
268 [OP_ST16] = cstr("ST16 "), 306 [OP_ST16] = cstr("ST16"),
269 [OP_ST32] = cstr("ST32 "), 307 [OP_ST32] = cstr("ST32"),
270 [OP_ST64] = cstr("ST64 "), 308 [OP_ST64] = cstr("ST64"),
271 // Arithmetic. 309 // Arithmetic.
272 [OP_ADDI] = cstr("ADDI "), 310 [OP_ADDI] = cstr("ADDI"),
273 [OP_SUBI] = cstr("SUBI "), 311 [OP_SUBI] = cstr("SUBI"),
274 [OP_MULI] = cstr("MULI "), 312 [OP_MULI] = cstr("MULI"),
275 [OP_DIVI] = cstr("DIVI "), 313 [OP_DIVI] = cstr("DIVI"),
276 [OP_MODI] = cstr("MODI "), 314 [OP_MODI] = cstr("MODI"),
277 [OP_ADD] = cstr("ADD "), 315 [OP_ADD] = cstr("ADD"),
278 [OP_SUB] = cstr("SUB "), 316 [OP_SUB] = cstr("SUB"),
279 [OP_MUL] = cstr("MUL "), 317 [OP_MUL] = cstr("MUL"),
280 [OP_DIV] = cstr("DIV "), 318 [OP_DIV] = cstr("DIV"),
281 [OP_MOD] = cstr("MOD "), 319 [OP_MOD] = cstr("MOD"),
282 [OP_ADDFI] = cstr("ADDFI "), 320 [OP_ADDFI] = cstr("ADDFI"),
283 [OP_SUBFI] = cstr("SUBFI "), 321 [OP_SUBFI] = cstr("SUBFI"),
284 [OP_MULFI] = cstr("MULFI "), 322 [OP_MULFI] = cstr("MULFI"),
285 [OP_DIVFI] = cstr("DIVFI "), 323 [OP_DIVFI] = cstr("DIVFI"),
286 [OP_MODFI] = cstr("MODFI "), 324 [OP_MODFI] = cstr("MODFI"),
287 [OP_ADDF] = cstr("ADDF "), 325 [OP_ADDF] = cstr("ADDF"),
288 [OP_SUBF] = cstr("SUBF "), 326 [OP_SUBF] = cstr("SUBF"),
289 [OP_MULF] = cstr("MULF "), 327 [OP_MULF] = cstr("MULF"),
290 [OP_DIVF] = cstr("DIVF "), 328 [OP_DIVF] = cstr("DIVF"),
291 // Reg copy/move. 329 // Reg copy/move.
292 [OP_MODF] = cstr("MODF "), 330 [OP_MODF] = cstr("MODF"),
293 [OP_MOV8] = cstr("MOV8 "), 331 [OP_MOV8] = cstr("MOV8"),
294 [OP_MOV16] = cstr("MOV16 "), 332 [OP_MOV16] = cstr("MOV16"),
295 [OP_MOV32] = cstr("MOV32 "), 333 [OP_MOV32] = cstr("MOV32"),
296 [OP_MOV64] = cstr("MOV64 "), 334 [OP_MOV64] = cstr("MOV64"),
335 [OP_MOV8I] = cstr("MOV8I"),
336 [OP_MOV16I] = cstr("MOV16I"),
337 [OP_MOV32I] = cstr("MOV32I"),
338 [OP_MOV64I] = cstr("MOV64I"),
297 // Logic operations. 339 // Logic operations.
298 [OP_EQI] = cstr("EQI "), 340 [OP_EQI] = cstr("EQI"),
299 [OP_NEQI] = cstr("NEQI "), 341 [OP_NEQI] = cstr("NEQI"),
300 [OP_LTI] = cstr("LTI "), 342 [OP_LTI] = cstr("LTI"),
301 [OP_GTI] = cstr("GTI "), 343 [OP_GTI] = cstr("GTI"),
302 [OP_LEI] = cstr("LEI "), 344 [OP_LEI] = cstr("LEI"),
303 [OP_GEI] = cstr("GEI "), 345 [OP_GEI] = cstr("GEI"),
304 [OP_ANDI] = cstr("ANDI "), 346 [OP_ANDI] = cstr("ANDI"),
305 [OP_ORI] = cstr("ORI "), 347 [OP_ORI] = cstr("ORI"),
306 [OP_NOTI] = cstr("NOTI "), 348 [OP_NOTI] = cstr("NOTI"),
307 [OP_EQ] = cstr("EQ "), 349 [OP_EQ] = cstr("EQ"),
308 [OP_NEQ] = cstr("NEQ "), 350 [OP_NEQ] = cstr("NEQ"),
309 [OP_LT] = cstr("LT "), 351 [OP_LT] = cstr("LT"),
310 [OP_GT] = cstr("GT "), 352 [OP_GT] = cstr("GT"),
311 [OP_LE] = cstr("LE "), 353 [OP_LE] = cstr("LE"),
312 [OP_GE] = cstr("GE "), 354 [OP_GE] = cstr("GE"),
313 [OP_AND] = cstr("AND "), 355 [OP_AND] = cstr("AND"),
314 [OP_OR] = cstr("OR "), 356 [OP_OR] = cstr("OR"),
315 [OP_NOT] = cstr("NOT "), 357 [OP_NOT] = cstr("NOT"),
316 // Bitwise operations. 358 // Bitwise operations.
317 [OP_BITLSHIFTI] = cstr("LSHI "), 359 [OP_BITLSHIFTI] = cstr("LSHI"),
318 [OP_BITRSHIFTI] = cstr("RSHI "), 360 [OP_BITRSHIFTI] = cstr("RSHI"),
319 [OP_BITANDI] = cstr("BANDI "), 361 [OP_BITANDI] = cstr("BANDI"),
320 [OP_BITORI] = cstr("BORI "), 362 [OP_BITORI] = cstr("BORI"),
321 [OP_BITXORI] = cstr("BXORI "), 363 [OP_BITXORI] = cstr("BXORI"),
322 [OP_BITNOTI] = cstr("BNOTI "), 364 [OP_BITNOTI] = cstr("BNOTI"),
323 [OP_BITLSHIFT] = cstr("LSH "), 365 [OP_BITLSHIFT] = cstr("LSH"),
324 [OP_BITRSHIFT] = cstr("RSH "), 366 [OP_BITRSHIFT] = cstr("RSH"),
325 [OP_BITAND] = cstr("BAND "), 367 [OP_BITAND] = cstr("BAND"),
326 [OP_BITOR] = cstr("BOR "), 368 [OP_BITOR] = cstr("BOR"),
327 [OP_BITXOR] = cstr("XBOR "), 369 [OP_BITXOR] = cstr("XBOR"),
328 [OP_BITNOT] = cstr("BNOT "), 370 [OP_BITNOT] = cstr("BNOT"),
329 // Jump instructions. 371 // Jump instructions.
330 [OP_JMP] = cstr("JMP "), 372 [OP_JMP] = cstr("JMP"),
331 [OP_JMPF] = cstr("JMPF "), 373 [OP_JMPF] = cstr("JMPF"),
332 [OP_JMPT] = cstr("JMPT "), 374 [OP_JMPT] = cstr("JMPT"),
333 [OP_JMPFI] = cstr("JMPFI "), 375 [OP_JMPFI] = cstr("JMPFI"),
334 [OP_JMPTI] = cstr("JMPTI "), 376 [OP_JMPTI] = cstr("JMPTI"),
335}; 377};
336 378
337typedef enum { 379typedef enum {
@@ -349,6 +391,7 @@ typedef struct CompResult {
349} CompResult; 391} CompResult;
350 392
351CompResult compile_expr(Compiler *compiler, Chunk *chunk, Node *node); 393CompResult compile_expr(Compiler *compiler, Chunk *chunk, Node *node);
394void disassemble_chunk(Chunk chunk);
352 395
353sz 396sz
354add_constant(Chunk *chunk, sz value) { 397add_constant(Chunk *chunk, sz value) {
@@ -375,37 +418,6 @@ add_string(Chunk *chunk, Str string) {
375 return map->val; 418 return map->val;
376} 419}
377 420
378sz
379add_variable(Chunk *chunk, Str name, Str type, sz arr_size) {
380 sz idx = array_size(chunk->vars);
381 sz size = 8;
382 // TODO: get type storage from a table to consider all the basic
383 // types as well as user defined ones.
384 if (str_eq(type, cstr("str"))) {
385 size = 16;
386 }
387 if (arr_size) {
388 // TODO: get the proper storage size for the multiplication.
389 size *= arr_size;
390 // FIXME: this should be done on the static analysis, plus,
391 // we shouldn't be checking all these types by hand, but
392 // using the symbol tables.
393 type = str_remove_prefix(type, cstr("@"));
394 type = str_concat(cstr("[]"), type, chunk->storage);
395 }
396 Variable var = (Variable){
397 .name = name,
398 .type = type,
399 .size = size,
400 .offset = chunk->var_off,
401 .idx = idx,
402 };
403 varmap_insert(&chunk->varmap, name, var, chunk->storage);
404 array_push(chunk->vars, var, chunk->storage);
405 chunk->var_off += size;
406 return idx;
407}
408
409void 421void
410emit_op(OpCode op, sz dst, sz a, sz b, Node *node, Chunk *chunk) { 422emit_op(OpCode op, sz dst, sz a, sz b, Node *node, Chunk *chunk) {
411 Instruction inst = (Instruction){ 423 Instruction inst = (Instruction){
@@ -423,6 +435,28 @@ emit_op(OpCode op, sz dst, sz a, sz b, Node *node, Chunk *chunk) {
423} 435}
424 436
425void 437void
438emit_sized_op(sz size,
439 OpCode op64,
440 OpCode op32,
441 OpCode op16,
442 OpCode op8,
443 sz dst,
444 sz a,
445 sz b,
446 Node *node,
447 Chunk *chunk) {
448 if (size == 8) {
449 emit_op(op64, dst, a, b, node, chunk);
450 } else if (size == 4) {
451 emit_op(op32, dst, a, b, node, chunk);
452 } else if (size == 2) {
453 emit_op(op16, dst, a, b, node, chunk);
454 } else if (size == 1) {
455 emit_op(op8, dst, a, b, node, chunk);
456 }
457}
458
459void
426emit_fat_copy(Chunk *chunk, Node *node, sz dst_addr, sz src_addr) { 460emit_fat_copy(Chunk *chunk, Node *node, sz dst_addr, sz src_addr) {
427 sz reg_dst = chunk->reg_idx++; 461 sz reg_dst = chunk->reg_idx++;
428 462
@@ -438,8 +472,6 @@ emit_fat_copy(Chunk *chunk, Node *node, sz dst_addr, sz src_addr) {
438 emit_op(OP_ST64I, reg_dst, dst_addr, one, node, chunk); 472 emit_op(OP_ST64I, reg_dst, dst_addr, one, node, chunk);
439} 473}
440 474
441void disassemble_chunk(Chunk chunk);
442
443void 475void
444emit_compile_err(Compiler *compiler, Chunk *chunk, Node *node) { 476emit_compile_err(Compiler *compiler, Chunk *chunk, Node *node) {
445 disassemble_chunk(*chunk); 477 disassemble_chunk(*chunk);
@@ -452,50 +484,50 @@ CompResult
452compile_binary(Compiler *compiler, Chunk *chunk, Node *node) { 484compile_binary(Compiler *compiler, Chunk *chunk, Node *node) {
453 OpCode op = OP_HALT; 485 OpCode op = OP_HALT;
454 OpCode opi = OP_HALT; 486 OpCode opi = OP_HALT;
455 OpCode ldop = OP_LD64K; 487 OpCode ldop = OP_LDCONST;
456 switch (node->kind) { 488 switch (node->kind) {
457 // Arithmetic. 489 // Arithmetic.
458 case NODE_ADD: { 490 case NODE_ADD: {
459 if (str_eq(node->type, cstr("int"))) { 491 if (strset_lookup(&compiler->integer_types, node->type)) {
460 op = OP_ADD; 492 op = OP_ADD;
461 opi = OP_ADDI; 493 opi = OP_ADDI;
462 } else if (str_eq(node->type, cstr("f64"))) { 494 } else if (strset_lookup(&compiler->float_types, node->type)) {
463 op = OP_ADDF; 495 op = OP_ADDF;
464 opi = OP_ADDFI; 496 opi = OP_ADDFI;
465 } 497 }
466 } break; 498 } break;
467 case NODE_SUB: { 499 case NODE_SUB: {
468 if (str_eq(node->type, cstr("int"))) { 500 if (strset_lookup(&compiler->integer_types, node->type)) {
469 op = OP_SUB; 501 op = OP_SUB;
470 opi = OP_SUBI; 502 opi = OP_SUBI;
471 } else if (str_eq(node->type, cstr("f64"))) { 503 } else if (strset_lookup(&compiler->float_types, node->type)) {
472 op = OP_SUBF; 504 op = OP_SUBF;
473 opi = OP_SUBFI; 505 opi = OP_SUBFI;
474 } 506 }
475 } break; 507 } break;
476 case NODE_MUL: { 508 case NODE_MUL: {
477 if (str_eq(node->type, cstr("int"))) { 509 if (strset_lookup(&compiler->integer_types, node->type)) {
478 op = OP_MUL; 510 op = OP_MUL;
479 opi = OP_MULI; 511 opi = OP_MULI;
480 } else if (str_eq(node->type, cstr("f64"))) { 512 } else if (strset_lookup(&compiler->float_types, node->type)) {
481 op = OP_MULF; 513 op = OP_MULF;
482 opi = OP_MULFI; 514 opi = OP_MULFI;
483 } 515 }
484 } break; 516 } break;
485 case NODE_DIV: { 517 case NODE_DIV: {
486 if (str_eq(node->type, cstr("int"))) { 518 if (strset_lookup(&compiler->integer_types, node->type)) {
487 op = OP_DIV; 519 op = OP_DIV;
488 opi = OP_DIVI; 520 opi = OP_DIVI;
489 } else if (str_eq(node->type, cstr("f64"))) { 521 } else if (strset_lookup(&compiler->float_types, node->type)) {
490 op = OP_DIVF; 522 op = OP_DIVF;
491 opi = OP_DIVFI; 523 opi = OP_DIVFI;
492 } 524 }
493 } break; 525 } break;
494 case NODE_MOD: { 526 case NODE_MOD: {
495 if (str_eq(node->type, cstr("int"))) { 527 if (strset_lookup(&compiler->integer_types, node->type)) {
496 op = OP_MOD; 528 op = OP_MOD;
497 opi = OP_MODI; 529 opi = OP_MODI;
498 } else if (str_eq(node->type, cstr("f64"))) { 530 } else if (strset_lookup(&compiler->float_types, node->type)) {
499 op = OP_MODF; 531 op = OP_MODF;
500 opi = OP_MODFI; 532 opi = OP_MODFI;
501 } 533 }
@@ -525,14 +557,6 @@ compile_binary(Compiler *compiler, Chunk *chunk, Node *node) {
525 op = OP_GE; 557 op = OP_GE;
526 opi = OP_GEI; 558 opi = OP_GEI;
527 } break; 559 } break;
528 case NODE_AND: {
529 op = OP_AND;
530 opi = OP_ANDI;
531 } break;
532 case NODE_OR: {
533 op = OP_OR;
534 opi = OP_ORI;
535 } break;
536 // Bitwise. 560 // Bitwise.
537 case NODE_BITOR: { 561 case NODE_BITOR: {
538 op = OP_BITOR; 562 op = OP_BITOR;
@@ -592,6 +616,88 @@ compile_binary(Compiler *compiler, Chunk *chunk, Node *node) {
592} 616}
593 617
594CompResult 618CompResult
619compile_binary_logic(Compiler *compiler, Chunk *chunk, Node *node) {
620 // Logical functions have to shortcircuit once the answer is known.
621 OpCode op = OP_HALT;
622 OpCode opi = OP_HALT;
623 OpCode ldop = OP_LDCONST;
624 OpCode jmpop = OP_HALT;
625 OpCode jmpopi = OP_HALT;
626 bool default_value = false;
627 switch (node->kind) {
628 case NODE_AND: {
629 op = OP_AND;
630 opi = OP_ANDI;
631 jmpop = OP_JMPF;
632 jmpopi = OP_JMPFI;
633 default_value = false;
634 } break;
635 case NODE_OR: {
636 op = OP_OR;
637 opi = OP_ORI;
638 jmpop = OP_JMPT;
639 jmpopi = OP_JMPTI;
640 default_value = true;
641 } break;
642 default: break;
643 }
644
645 sz lab0 = chunk->labels_idx++;
646 sz lab1 = chunk->labels_idx++;
647
648 CompResult comp_a = compile_expr(compiler, chunk, node->binary.left);
649 sz reg_a;
650 switch (comp_a.type) {
651 case COMP_CONST: {
652 emit_op(jmpopi, lab0, comp_a.idx, 0, node->binary.left, chunk);
653 reg_a = chunk->reg_idx++;
654 emit_op(ldop, reg_a, comp_a.idx, 0, node, chunk);
655 } break;
656 case COMP_REG: {
657 emit_op(jmpop, lab0, comp_a.idx, 0, node->binary.left, chunk);
658 reg_a = comp_a.idx;
659 } break;
660 default: {
661 emit_compile_err(compiler, chunk, node);
662 return (CompResult){.type = COMP_ERR};
663 } break;
664 }
665
666 CompResult comp_b = compile_expr(compiler, chunk, node->binary.right);
667 sz reg_b;
668 switch (comp_b.type) {
669 case COMP_CONST: {
670 reg_b = comp_b.idx;
671 op = opi;
672 } break;
673 case COMP_REG: {
674 reg_b = comp_b.idx;
675 } break;
676 default: {
677 emit_compile_err(compiler, chunk, node);
678 return (CompResult){.type = COMP_ERR};
679 } break;
680 }
681 sz reg_dst = chunk->reg_idx++;
682 emit_op(op, reg_dst, reg_a, reg_b, node, chunk);
683
684 // Jump to the end of this comparison.
685 emit_op(OP_JMP, lab1, 0, 0, node, chunk);
686 sz pos0 = array_size(chunk->code);
687
688 // Load default value.
689 sz defaul_const = add_constant(chunk, default_value);
690 emit_op(ldop, reg_dst, defaul_const, 0, node, chunk);
691 sz pos1 = array_size(chunk->code);
692
693 // Register labels.
694 intintmap_insert(&chunk->labels, lab0, pos0, chunk->storage);
695 intintmap_insert(&chunk->labels, lab1, pos1, chunk->storage);
696
697 return (CompResult){.type = COMP_REG, .idx = reg_dst};
698}
699
700CompResult
595compile_unary(Compiler *compiler, Chunk *chunk, Node *node) { 701compile_unary(Compiler *compiler, Chunk *chunk, Node *node) {
596 OpCode op = OP_HALT; 702 OpCode op = OP_HALT;
597 OpCode opi = OP_HALT; 703 OpCode opi = OP_HALT;
@@ -657,8 +763,8 @@ compile_if(Compiler *compiler, Chunk *chunk, Node *node) {
657 compile_expr(compiler, chunk, node->ifelse.expr_true); 763 compile_expr(compiler, chunk, node->ifelse.expr_true);
658 switch (then_expr.type) { 764 switch (then_expr.type) {
659 case COMP_CONST: { 765 case COMP_CONST: {
660 emit_op(OP_LD64K, reg_dst, then_expr.idx, 0, node->ifelse.cond, 766 emit_op(OP_LDCONST, reg_dst, then_expr.idx, 0,
661 chunk); 767 node->ifelse.cond, chunk);
662 } break; 768 } break;
663 case COMP_REG: { 769 case COMP_REG: {
664 emit_op(OP_MOV64, reg_dst, then_expr.idx, 0, node->ifelse.cond, 770 emit_op(OP_MOV64, reg_dst, then_expr.idx, 0, node->ifelse.cond,
@@ -681,7 +787,7 @@ compile_if(Compiler *compiler, Chunk *chunk, Node *node) {
681 compile_expr(compiler, chunk, node->ifelse.expr_else); 787 compile_expr(compiler, chunk, node->ifelse.expr_else);
682 switch (else_expr.type) { 788 switch (else_expr.type) {
683 case COMP_CONST: { 789 case COMP_CONST: {
684 emit_op(OP_LD64K, reg_dst, else_expr.idx, 0, 790 emit_op(OP_LDCONST, reg_dst, else_expr.idx, 0,
685 node->ifelse.expr_else, chunk); 791 node->ifelse.expr_else, chunk);
686 } break; 792 } break;
687 case COMP_REG: { 793 case COMP_REG: {
@@ -803,7 +909,7 @@ compile_cond(Compiler *compiler, Chunk *chunk, Node *node) {
803 compile_expr(compiler, chunk, expr->case_entry.expr); 909 compile_expr(compiler, chunk, expr->case_entry.expr);
804 switch (then_expr.type) { 910 switch (then_expr.type) {
805 case COMP_CONST: { 911 case COMP_CONST: {
806 emit_op(OP_LD64K, reg_dst, then_expr.idx, 0, 912 emit_op(OP_LDCONST, reg_dst, then_expr.idx, 0,
807 expr->case_entry.expr, chunk); 913 expr->case_entry.expr, chunk);
808 } break; 914 } break;
809 case COMP_REG: { 915 case COMP_REG: {
@@ -828,7 +934,7 @@ compile_cond(Compiler *compiler, Chunk *chunk, Node *node) {
828 compile_expr(compiler, chunk, expr->case_entry.expr); 934 compile_expr(compiler, chunk, expr->case_entry.expr);
829 switch (then_expr.type) { 935 switch (then_expr.type) {
830 case COMP_CONST: { 936 case COMP_CONST: {
831 emit_op(OP_LD64K, reg_dst, then_expr.idx, 0, 937 emit_op(OP_LDCONST, reg_dst, then_expr.idx, 0,
832 expr->case_entry.expr, chunk); 938 expr->case_entry.expr, chunk);
833 } break; 939 } break;
834 case COMP_REG: { 940 case COMP_REG: {
@@ -900,85 +1006,268 @@ compile_while(Compiler *compiler, Chunk *chunk, Node *node) {
900} 1006}
901 1007
902CompResult 1008CompResult
1009compile_tail_call(Compiler *compiler, Chunk *chunk, Node *node, sz fun_idx) {
1010 // Update the local parameters.
1011 for (sz i = 0; i < array_size(node->elements); i++) {
1012 Node *expr = node->elements[i];
1013 CompResult result = compile_expr(compiler, chunk, expr);
1014 switch (result.type) {
1015 case COMP_CONST: {
1016 emit_op(OP_STLVARI, i, result.idx, 0, node, chunk);
1017 } break;
1018 case COMP_REG: {
1019 if (str_eq(expr->type, cstr("Str"))) {
1020 sz var_addr = chunk->reg_idx++;
1021 sz str_addr = result.idx;
1022 emit_op(OP_LDLADDR, var_addr, i, 0, node, chunk);
1023 emit_fat_copy(chunk, node, var_addr, str_addr);
1024 } else {
1025 emit_op(OP_STLVAR, i, result.idx, 0, node, chunk);
1026 }
1027 } break;
1028 case COMP_STRING: {
1029 sz var_addr = chunk->reg_idx++;
1030 sz str_addr = chunk->reg_idx++;
1031 emit_op(OP_LDLADDR, var_addr, i, 0, node, chunk);
1032 emit_op(OP_LDSTR, str_addr, result.idx, 0, node, chunk);
1033 emit_fat_copy(chunk, node, var_addr, str_addr);
1034 } break;
1035 default: {
1036 emit_compile_err(compiler, chunk, node);
1037 return (CompResult){.type = COMP_ERR};
1038 } break;
1039 }
1040 }
1041
1042 if (fun_idx == chunk->fun.index) {
1043 emit_op(OP_RECUR_SELF, fun_idx, 0, 0, node, chunk);
1044 } else {
1045 emit_op(OP_RECUR, fun_idx, 0, 0, node, chunk);
1046 }
1047 return (CompResult){.type = COMP_NIL};
1048}
1049
1050CompResult
903compile_funcall(Compiler *compiler, Chunk *chunk, Node *node) { 1051compile_funcall(Compiler *compiler, Chunk *chunk, Node *node) {
904 Str name = node->value.str; 1052 // Get variable information.
1053 Str name = node->unique_name;
1054 SymbolMap *map = symmap_lookup(&compiler->symbols, name);
905 1055
906 // Builtins. 1056 // Builtins.
907 if (str_eq(name, cstr("print")) || str_eq(name, cstr("println"))) { 1057 if (map->val.kind == SYM_BUILTIN_FUN) {
908 for (sz i = 0; i < array_size(node->elements); i++) { 1058 if (str_eq(name, cstr("print")) || str_eq(name, cstr("println"))) {
909 Node *expr = node->elements[i]; 1059 for (sz i = 0; i < array_size(node->elements); i++) {
910 CompResult result = compile_expr(compiler, chunk, expr); 1060 Node *expr = node->elements[i];
911 if (str_eq(expr->type, cstr("int"))) { 1061 Str type_name = expr->type;
912 switch (result.type) { 1062 // NOTE: slices [] and dynamic arrays [...] should be treated
913 case COMP_CONST: { 1063 // differently from static arrays.
914 emit_op(OP_PRINTS64I, result.idx, 0, 0, expr, chunk); 1064 if (str_has_prefix(type_name, cstr("@")) ||
915 } break; 1065 str_has_prefix(type_name, cstr("["))) {
916 case COMP_REG: { 1066 type_name = cstr("Ptr");
917 emit_op(OP_PRINTS64, result.idx, 0, 0, expr, chunk);
918 } break;
919 default: {
920 emit_compile_err(compiler, chunk, node);
921 return (CompResult){.type = COMP_ERR};
922 } break;
923 }
924 } else if (str_eq(expr->type, cstr("f64"))) {
925 switch (result.type) {
926 case COMP_CONST: {
927 emit_op(OP_PRINTF64I, result.idx, 0, 0, expr, chunk);
928 } break;
929 case COMP_REG: {
930 emit_op(OP_PRINTF64, result.idx, 0, 0, expr, chunk);
931 } break;
932 default: {
933 emit_compile_err(compiler, chunk, node);
934 return (CompResult){.type = COMP_ERR};
935 } break;
936 }
937 } else if (str_eq(expr->type, cstr("str"))) {
938 switch (result.type) {
939 case COMP_STRING: {
940 emit_op(OP_PRINTSTRI, result.idx, 0, 0, expr, chunk);
941 } break;
942 case COMP_REG: {
943 emit_op(OP_PRINTSTR, result.idx, 0, 0, expr, chunk);
944 } break;
945 default: {
946 emit_compile_err(compiler, chunk, node);
947 return (CompResult){.type = COMP_ERR};
948 } break;
949 } 1067 }
950 } else if (str_eq(expr->type, cstr("bool"))) { 1068 SymbolMap *map = symmap_lookup(&compiler->symbols, type_name);
951 switch (result.type) { 1069 sz type_size = map->val.t.size;
952 case COMP_CONST: { 1070 CompResult result = compile_expr(compiler, chunk, expr);
953 emit_op(OP_PRINTBOOLI, result.idx, 0, 0, expr, chunk); 1071 if (strset_lookup(&compiler->sint_types, type_name)) {
954 } break; 1072 switch (result.type) {
955 case COMP_REG: { 1073 case COMP_CONST: {
956 emit_op(OP_PRINTBOOL, result.idx, 0, 0, expr, chunk); 1074 emit_sized_op(type_size, OP_PRINTS64I, OP_PRINTS32I,
957 } break; 1075 OP_PRINTS16I, OP_PRINTS8I, result.idx,
958 default: { 1076 0, 0, expr, chunk);
959 emit_compile_err(compiler, chunk, node); 1077 } break;
960 return (CompResult){.type = COMP_ERR}; 1078 case COMP_REG: {
961 } break; 1079 emit_sized_op(type_size, OP_PRINTS64, OP_PRINTS32,
1080 OP_PRINTS16, OP_PRINTS8, result.idx,
1081 0, 0, expr, chunk);
1082 } break;
1083 default: {
1084 emit_compile_err(compiler, chunk, node);
1085 return (CompResult){.type = COMP_ERR};
1086 } break;
1087 }
1088 } else if (strset_lookup(&compiler->uint_types, type_name)) {
1089 switch (result.type) {
1090 case COMP_CONST: {
1091 emit_sized_op(type_size, OP_PRINTU64I, OP_PRINTU32I,
1092 OP_PRINTU16I, OP_PRINTU8I, result.idx,
1093 0, 0, expr, chunk);
1094 } break;
1095 case COMP_REG: {
1096 emit_sized_op(type_size, OP_PRINTU64, OP_PRINTU32,
1097 OP_PRINTU16, OP_PRINTU8, result.idx,
1098 0, 0, expr, chunk);
1099 } break;
1100 default: {
1101 emit_compile_err(compiler, chunk, node);
1102 return (CompResult){.type = COMP_ERR};
1103 } break;
1104 }
1105 } else if (strset_lookup(&compiler->float_types, type_name)) {
1106 switch (result.type) {
1107 case COMP_CONST: {
1108 if (type_size == 8) {
1109 emit_op(OP_PRINTF64I, result.idx, 0, 0, expr,
1110 chunk);
1111 } else {
1112 emit_op(OP_PRINTF32I, result.idx, 0, 0, expr,
1113 chunk);
1114 }
1115 } break;
1116 case COMP_REG: {
1117 if (type_size == 8) {
1118 emit_op(OP_PRINTF64, result.idx, 0, 0, expr,
1119 chunk);
1120 } else {
1121 emit_op(OP_PRINTF32, result.idx, 0, 0, expr,
1122 chunk);
1123 }
1124 } break;
1125 default: {
1126 emit_compile_err(compiler, chunk, node);
1127 return (CompResult){.type = COMP_ERR};
1128 } break;
1129 }
1130 } else if (str_eq(type_name, cstr("Str"))) {
1131 switch (result.type) {
1132 case COMP_STRING: {
1133 emit_op(OP_PRINTSTRI, result.idx, 0, 0, expr,
1134 chunk);
1135 } break;
1136 case COMP_REG: {
1137 emit_op(OP_PRINTSTR, result.idx, 0, 0, expr, chunk);
1138 } break;
1139 default: {
1140 emit_compile_err(compiler, chunk, node);
1141 return (CompResult){.type = COMP_ERR};
1142 } break;
1143 }
1144 } else if (str_eq(type_name, cstr("Bool"))) {
1145 switch (result.type) {
1146 case COMP_CONST: {
1147 emit_op(OP_PRINTBOOLI, result.idx, 0, 0, expr,
1148 chunk);
1149 } break;
1150 case COMP_REG: {
1151 emit_op(OP_PRINTBOOL, result.idx, 0, 0, expr,
1152 chunk);
1153 } break;
1154 default: {
1155 emit_compile_err(compiler, chunk, node);
1156 return (CompResult){.type = COMP_ERR};
1157 } break;
1158 }
962 } 1159 }
963 } 1160 }
1161 if (str_eq(name, cstr("println"))) {
1162 sz idx = add_string(chunk, cstr("\n"));
1163 emit_op(OP_PRINTSTRI, idx, 0, 0, node, chunk);
1164 }
1165 return (CompResult){.type = COMP_NIL};
1166 } else if (str_eq(name, cstr("sizeof"))) {
1167 // Find size of the given type.
1168 // Node *expr = node->elements[0];
1169 // Str type_name = expr->value.str;
1170 // // Try to find the type on the table.
1171 // TypeMap *t = typemap_lookup(&compiler->type_map, type_name);
1172 // sz size = 0;
1173 // if (t) {
1174 // size = t->val.size;
1175 // } else {
1176 // // If that's not possible, try resolving the symbol.
1177 // Str name = expr->unique_name;
1178 // VarMap *map = NULL;
1179 // Chunk *next = chunk;
1180 // while (next) {
1181 // map = varmap_lookup(&next->varmap, name);
1182 // if (map) {
1183 // break;
1184 // }
1185 // next = next->parent;
1186 // }
1187 // if (!map) {
1188 // emit_compile_err(compiler, chunk, expr);
1189 // return (CompResult){.type = COMP_ERR};
1190 // }
1191 // Variable var = map->val;
1192 // size = var_size;
1193
1194 // FIXME: type size and tables is better done on semantic
1195 // analyzer,
1196 // enough bloat here already.
1197 // sz reg_dst = chunk->reg_idx++;
1198 // sz const_idx = add_constant(chunk, size);
1199 // emit_op(OP_LDCONST, reg_dst, const_idx, 0, node, chunk);
1200 // return (CompResult){.type = COMP_REG, .idx = reg_dst};
1201 // }
964 } 1202 }
965 if (str_eq(name, cstr("println"))) { 1203 } else if (map->val.kind == SYM_GLOBALVAR ||
966 sz idx = add_string(chunk, cstr("\n")); 1204 map->val.kind == SYM_LOCALVAR) {
967 emit_op(OP_PRINTSTRI, idx, 0, 0, node, chunk); 1205 // TODO: It's a function pointer.
968 }
969 return (CompResult){.type = COMP_NIL};
970 } 1206 }
971 1207
972 FunctionMap *map = 1208 // Check for tail recursive opportunities.
973 funcmap_lookup(&compiler->main_chunk.funmap, node->unique_name); 1209 Function *fun = map->val.fun;
974 if (!map) { 1210 if (str_eq(fun->type, chunk->fun.type) && chunk->id != 0) {
975 emit_compile_err(compiler, chunk, node); 1211 Node *parent = node->parent;
976 return (CompResult){.type = COMP_ERR}; 1212 Node *current = node;
1213 bool tail_recursive = true;
1214 while (parent != NULL) {
1215 switch (parent->kind) {
1216 case NODE_BLOCK: {
1217 sz idx = array_size(parent->statements) - 1;
1218 if (parent->statements[idx] != node) {
1219 tail_recursive = false;
1220 break;
1221 }
1222 } break;
1223 case NODE_WHILE: {
1224 if (current == parent->loop.cond) {
1225 tail_recursive = false;
1226 break;
1227 }
1228 } break;
1229 case NODE_IF: {
1230 if (current == parent->ifelse.cond) {
1231 tail_recursive = false;
1232 break;
1233 }
1234 } break;
1235 case NODE_FUN: {
1236 sz idx = array_size(parent->func.body->statements) - 1;
1237 if (parent->func.body->statements[idx] != current) {
1238 tail_recursive = false;
1239 break;
1240 }
1241 break;
1242 } break;
1243 case NODE_MATCH: {
1244 if (current == parent->match.expr) {
1245 tail_recursive = false;
1246 break;
1247 }
1248 } break;
1249 case NODE_COND: break;
1250 case NODE_CASE_COND: {
1251 if (current == parent->case_entry.cond) {
1252 tail_recursive = false;
1253 break;
1254 }
1255 } break;
1256 default: {
1257 tail_recursive = false;
1258 break;
1259 } break;
1260 }
1261 parent = parent->parent;
1262 current = current->parent;
1263 }
1264 if (tail_recursive) {
1265 return compile_tail_call(compiler, chunk, node, fun->index);
1266 }
977 } 1267 }
978 Function fun = map->val;
979 1268
980 // Reserve space for the return value if needed. 1269 // Reserve space for the return value if needed.
981 if (fun.return_arity > 0) { 1270 if (fun->return_arity > 0) {
982 // Put the return data into a register 1271 // Put the return data into a register
983 sz ret_size = add_constant(chunk, 8); 1272 sz ret_size = add_constant(chunk, 8);
984 emit_op(OP_RESERVE, ret_size, 0, 0, node, chunk); 1273 emit_op(OP_RESERVE, ret_size, 0, 0, node, chunk);
@@ -988,20 +1277,27 @@ compile_funcall(Compiler *compiler, Chunk *chunk, Node *node) {
988 for (sz i = 0; i < array_size(node->elements); i++) { 1277 for (sz i = 0; i < array_size(node->elements); i++) {
989 Node *expr = node->elements[i]; 1278 Node *expr = node->elements[i];
990 CompResult result = compile_expr(compiler, chunk, expr); 1279 CompResult result = compile_expr(compiler, chunk, expr);
1280 Str type_name = expr->type;
1281 SymbolMap *map = symmap_lookup(&compiler->symbols, type_name);
1282 sz type_size = map->val.t.size;
1283
991 switch (result.type) { 1284 switch (result.type) {
992 case COMP_CONST: { 1285 case COMP_CONST: {
993 emit_op(OP_PUSHI, result.idx, 0, 0, expr, chunk); 1286 emit_op(OP_PUSHI, result.idx, 0, 0, expr, chunk);
994 } break; 1287 } break;
995 case COMP_REG: { 1288 case COMP_REG: {
996 if (str_eq(expr->type, cstr("str"))) { 1289 if (str_eq(expr->type, cstr("Str"))) {
997 sz str_addr = result.idx; 1290 sz str_addr = result.idx;
998 // Store the fat string pointer into the stack. 1291 // Store the fat string pointer into the stack.
999 sz reg_dst = chunk->reg_idx++; 1292 sz reg_dst = chunk->reg_idx++;
1000 sz zero = add_constant(chunk, 0); 1293 sz zero = add_constant(chunk, 0);
1001 sz one = add_constant(chunk, 1); 1294 sz one = add_constant(chunk, 1);
1002 emit_op(OP_LD64I, reg_dst, str_addr, zero, node, chunk); 1295 emit_sized_op(type_size, OP_LD64I, OP_LD32I, OP_LD16I,
1296 OP_LD8I, reg_dst, str_addr, zero, node,
1297 chunk);
1003 emit_op(OP_PUSH, reg_dst, 0, 0, expr, chunk); 1298 emit_op(OP_PUSH, reg_dst, 0, 0, expr, chunk);
1004 emit_op(OP_LD64I, reg_dst, str_addr, one, node, chunk); 1299 emit_sized_op(type_size, OP_LD64I, OP_LD32I, OP_LD16I,
1300 OP_LD8I, reg_dst, str_addr, one, node, chunk);
1005 emit_op(OP_PUSH, reg_dst, 0, 0, expr, chunk); 1301 emit_op(OP_PUSH, reg_dst, 0, 0, expr, chunk);
1006 } else { 1302 } else {
1007 emit_op(OP_PUSH, result.idx, 0, 0, expr, chunk); 1303 emit_op(OP_PUSH, result.idx, 0, 0, expr, chunk);
@@ -1029,10 +1325,11 @@ compile_funcall(Compiler *compiler, Chunk *chunk, Node *node) {
1029 } 1325 }
1030 } 1326 }
1031 1327
1032 emit_op(OP_CALL, fun.index, 0, 0, node, chunk); 1328 emit_op(OP_CALL, fun->index, 0, 0, node, chunk);
1033 1329
1034 // Only one return parameter for now. 1330 // Only one return parameter for now.
1035 if (fun.return_arity > 0) { 1331 if (fun->return_arity > 0) {
1332 // FIXME: This doesn't account for returning a value > WORD_SIZE.
1036 // Put the return data into a register 1333 // Put the return data into a register
1037 sz reg_dst = chunk->reg_idx++; 1334 sz reg_dst = chunk->reg_idx++;
1038 emit_op(OP_POP, reg_dst, 0, 0, node, chunk); 1335 emit_op(OP_POP, reg_dst, 0, 0, node, chunk);
@@ -1071,149 +1368,70 @@ compile_return(Compiler *compiler, Chunk *chunk, Node *node) {
1071} 1368}
1072 1369
1073Chunk * 1370Chunk *
1074chunk_alloc(Chunk *parent) { 1371chunk_alloc(Compiler *compiler) {
1075 static sz chunk_idx = 1; 1372 Chunk *chunk = arena_calloc((sz)sizeof(Chunk), compiler->storage);
1076 Chunk *chunk = arena_calloc((sz)sizeof(Chunk), parent->storage); 1373 chunk->storage = compiler->storage;
1077 chunk->parent = parent; 1374 chunk->file_name = compiler->file_name;
1078 chunk->id = chunk_idx++;
1079 chunk->storage = parent->storage;
1080 chunk->file_name = parent->file_name;
1081 return chunk; 1375 return chunk;
1082} 1376}
1083 1377
1084void 1378void
1085verify_chunk(Chunk *chunk) { 1379verify_chunk(Chunk *chunk) {
1086 if (chunk->const_idx >= 256) { 1380 assert(chunk);
1087 eprintln("too many constants on chunk %s", chunk->id); 1381 if (chunk->const_idx >= 0xFFFF) {
1382 eprintln("too many constants on chunk %d: %d", chunk->id,
1383 chunk->const_idx);
1088 exit(EXIT_FAILURE); 1384 exit(EXIT_FAILURE);
1089 } 1385 }
1090 if (chunk->str_idx >= 256) { 1386 if (chunk->str_idx >= 0xFFFF) {
1091 eprintln("too many strings on chunk %s", chunk->id); 1387 eprintln("too many strings on chunk %d: %d", chunk->id, chunk->str_idx);
1092 exit(EXIT_FAILURE); 1388 exit(EXIT_FAILURE);
1093 } 1389 }
1094 if (chunk->reg_idx >= 256) { 1390 if (chunk->reg_idx >= 0xFFFF) {
1095 eprintln("too many registers used on chunk %s", chunk->id); 1391 eprintln("too many registers used on chunk %d: %d", chunk->id,
1392 chunk->reg_idx);
1096 exit(EXIT_FAILURE); 1393 exit(EXIT_FAILURE);
1097 } 1394 }
1098 if (chunk->labels_idx >= 256) { 1395 if (chunk->labels_idx >= 0xFFFF) {
1099 eprintln("too many labels used on chunk %s", chunk->id); 1396 eprintln("too many labels used on chunk %d: %d", chunk->id,
1397 chunk->labels_idx);
1100 exit(EXIT_FAILURE); 1398 exit(EXIT_FAILURE);
1101 } 1399 }
1102 if (chunk->fun_idx >= 256) {
1103 eprintln("too many functions on chunk %s", chunk->id);
1104 exit(EXIT_FAILURE);
1105 }
1106}
1107
1108void
1109declare_function(Chunk *chunk, Node *node) {
1110 Str name = node->unique_name;
1111 FunctionMap *map = funcmap_lookup(&chunk->funmap, node->unique_name);
1112 if (map) {
1113 return;
1114 }
1115 Function fun = (Function){
1116 .name = name,
1117 .index = chunk->fun_idx++,
1118 .param_arity = array_size(node->func.params),
1119 .return_arity = array_size(node->func.ret),
1120 };
1121 funcmap_insert(&chunk->funmap, node->unique_name, fun, chunk->storage);
1122}
1123
1124CompResult
1125compile_function(Compiler *compiler, Chunk *chunk, Node *node) {
1126 // The current activation record procedure for the VM is as follows:
1127 //
1128 // [caller][callee ]
1129 // [ .... ][ RET VAL ][ PARAMS ][ LOCALS ][ REGISTERS ][ RET META ]
1130 // ^
1131 // frame pointer
1132 //
1133 // The caller is responsible for allocating the return memory and the
1134 // parameter memory and filling the param data before OP_CALL.
1135 //
1136 chunk = &compiler->main_chunk;
1137 Chunk *func = chunk_alloc(chunk);
1138 func->name = node->unique_name;
1139 declare_function(chunk, node);
1140 array_push(chunk->functions, func, chunk->storage);
1141
1142 // Push arguments as locals.
1143 for (sz i = 0; i < array_size(node->func.params); i++) {
1144 Node *param = node->func.params[i];
1145 Str name = param->unique_name;
1146 Str type = param->type;
1147 sz arr_size = 0;
1148 if (str_has_prefix(type, cstr("@"))) {
1149 if (param->var.type && param->var.type->kind == NODE_ARR_TYPE &&
1150 param->var.type->sym.arr_size->value.i > 0) {
1151 arr_size = param->var.type->sym.arr_size->value.i;
1152 }
1153 }
1154 add_variable(func, name, type, arr_size);
1155 }
1156 func->param_off = func->var_off;
1157
1158 // Compiling the body.
1159 CompResult res = compile_expr(compiler, func, node->func.body);
1160
1161 // Put return values into memory.
1162 switch (res.type) {
1163 case COMP_CONST: {
1164 emit_op(OP_PUTRETI, res.idx, 0, 0, node, func);
1165 } break;
1166 case COMP_REG: {
1167 emit_op(OP_PUTRET, res.idx, 0, 0, node, func);
1168 } break;
1169 default: break;
1170 }
1171
1172 emit_op(OP_RET, 0, 0, 0, node, func);
1173 verify_chunk(func);
1174 return (CompResult){.type = COMP_NIL};
1175} 1400}
1176 1401
1177CompResult 1402CompResult
1178compile_let(Compiler *compiler, Chunk *chunk, Node *node) { 1403compile_let(Compiler *compiler, Chunk *chunk, Node *node) {
1179 sz op_ldaddr = OP_LDLADDR; 1404 // Get variable information.
1180 sz op_stvari = OP_STLVARI; 1405 Str name = node->var.name->unique_name;
1181 sz op_stvar = OP_STLVAR; 1406 SymbolMap *map = symmap_lookup(&compiler->symbols, name);
1182 if (chunk == &compiler->main_chunk) { 1407 sz idx = map->val.var.idx;
1183 op_ldaddr = OP_LDGADDR; 1408 sz type_size = map->val.var.type.size;
1184 op_stvari = OP_STGVARI; 1409 sz var_size = map->val.var.size;
1185 op_stvar = OP_STGVAR; 1410 sz op_ldaddr = map->val.kind == SYM_GLOBALVAR ? OP_LDGADDR : OP_LDLADDR;
1186 }
1187 Str name = node->unique_name;
1188 Str type = node->var.name->type;
1189 sz arr_size = 0;
1190 if (str_has_prefix(type, cstr("@"))) {
1191 if (node->var.type && node->var.type->kind == NODE_ARR_TYPE &&
1192 node->var.type->sym.arr_size->value.i > 0) {
1193 arr_size = node->var.type->sym.arr_size->value.i;
1194 }
1195 }
1196
1197 sz idx = add_variable(chunk, name, type, arr_size);
1198 1411
1199 // Value. 1412 // Value.
1200 if (node->var.val) { 1413 if (node->var.val) {
1201 CompResult res = compile_expr(compiler, chunk, node->var.val); 1414 CompResult res = compile_expr(compiler, chunk, node->var.val);
1202 switch (res.type) { 1415 switch (res.type) {
1203 case COMP_CONST: { 1416 case COMP_CONST: {
1204 emit_op(op_stvari, idx, res.idx, 0, node->var.val, chunk); 1417 sz reg_addr = chunk->reg_idx++;
1418 sz reg_dst = chunk->reg_idx++;
1419 emit_op(op_ldaddr, reg_addr, idx, 0, node, chunk);
1420 emit_op(OP_LDCONST, reg_dst, res.idx, 0, node, chunk);
1421 emit_sized_op(type_size, OP_ST64K, OP_ST32K, OP_ST16K, OP_ST8K,
1422 reg_dst, reg_addr, 0, node, chunk);
1205 } break; 1423 } break;
1206 case COMP_REG: { 1424 case COMP_REG: {
1207 if (str_eq(node->var.val->type, cstr("str"))) { 1425 sz reg_addr = chunk->reg_idx++;
1208 // Get the address for the local/global storage 1426 sz reg_val = res.idx;
1209 // variable. 1427 emit_op(op_ldaddr, reg_addr, idx, 0, node, chunk);
1210 sz var_addr = chunk->reg_idx++; 1428 if (var_size > 8) {
1211 emit_op(op_ldaddr, var_addr, idx, 0, node, chunk); 1429 sz size_const = add_constant(chunk, var_size);
1212 1430 emit_op(OP_MEMCPYI, reg_addr, reg_val, size_const, node,
1213 // Copy the fat pointer. 1431 chunk);
1214 emit_fat_copy(chunk, node, var_addr, res.idx);
1215 } else { 1432 } else {
1216 emit_op(op_stvar, idx, res.idx, 0, node->var.val, chunk); 1433 emit_sized_op(var_size, OP_ST64K, OP_ST32K, OP_ST16K,
1434 OP_ST8K, reg_val, reg_addr, 0, node, chunk);
1217 } 1435 }
1218 } break; 1436 } break;
1219 case COMP_STRING: { 1437 case COMP_STRING: {
@@ -1241,94 +1459,39 @@ compile_let(Compiler *compiler, Chunk *chunk, Node *node) {
1241 1459
1242CompResult 1460CompResult
1243compile_set(Compiler *compiler, Chunk *chunk, Node *node) { 1461compile_set(Compiler *compiler, Chunk *chunk, Node *node) {
1244 Str name = node->unique_name; 1462 CompResult res_name = compile_expr(compiler, chunk, node->var.name);
1245 StrVarMap *map = NULL; 1463 CompResult res_val = compile_expr(compiler, chunk, node->var.val);
1246 Chunk *next = chunk;
1247 while (next) {
1248 map = varmap_lookup(&next->varmap, name);
1249 if (map) {
1250 break;
1251 }
1252 next = chunk->parent;
1253 }
1254 if (!map) {
1255 emit_compile_err(compiler, chunk, node);
1256 return (CompResult){.type = COMP_ERR};
1257 }
1258 sz op_ldaddr = OP_LDLADDR;
1259 sz op_ldvar = OP_LDLVAR;
1260 sz op_stvari = OP_STLVARI;
1261 sz op_stvar = OP_STLVAR;
1262 if (next == &compiler->main_chunk) {
1263 op_ldaddr = OP_LDGADDR;
1264 op_ldvar = OP_LDGVAR;
1265 op_stvari = OP_STGVARI;
1266 op_stvar = OP_STGVAR;
1267 }
1268 CompResult res = compile_expr(compiler, chunk, node->var.val);
1269 if (node->var.name->kind == NODE_SYMBOL_IDX) {
1270 // Value.
1271 sz reg_val;
1272 switch (res.type) {
1273 case COMP_CONST: {
1274 reg_val = chunk->reg_idx++;
1275 emit_op(OP_LD64K, reg_val, res.idx, 0, node, chunk);
1276 } break;
1277 case COMP_REG: {
1278 reg_val = res.idx;
1279 } break;
1280 default: {
1281 emit_compile_err(compiler, chunk, node);
1282 return (CompResult){.type = COMP_ERR};
1283 } break;
1284 }
1285 1464
1286 // Address. 1465 // Get variable information.
1287 sz reg_addr = chunk->reg_idx++; 1466 Str type = node->var.name->type;
1288 // Is this a pointer access or an array access? 1467 SymbolMap *map = symmap_lookup(&compiler->symbols, type);
1289 if (str_has_prefix(map->val.type, cstr("[]"))) { 1468 sz type_size = map->val.t.size;
1290 emit_op(op_ldaddr, reg_addr, map->val.idx, 0, node->var.val, chunk);
1291 } else {
1292 emit_op(op_ldvar, reg_addr, map->val.idx, 0, node->var.val, chunk);
1293 }
1294 1469
1295 // Index. 1470 switch (res_val.type) {
1296 CompResult idx =
1297 compile_expr(compiler, chunk, node->var.name->sym.arr_size);
1298 switch (idx.type) {
1299 case COMP_CONST: {
1300 emit_op(OP_ST64I, reg_val, reg_addr, idx.idx, node, chunk);
1301 } break;
1302 case COMP_REG: {
1303 emit_op(OP_ST64, reg_val, reg_addr, idx.idx, node, chunk);
1304 } break;
1305 default: {
1306 emit_compile_err(compiler, chunk, node);
1307 return (CompResult){.type = COMP_ERR};
1308 } break;
1309 }
1310 // TODO: offset should be in bytes, in this case we are assuming
1311 // 64bit types, hence ST64
1312 return (CompResult){.type = COMP_NIL};
1313 }
1314 switch (res.type) {
1315 case COMP_CONST: { 1471 case COMP_CONST: {
1316 emit_op(op_stvari, map->val.idx, res.idx, 0, node->var.val, chunk); 1472 sz reg_dst = chunk->reg_idx++;
1473 emit_op(OP_LDCONST, reg_dst, res_val.idx, 0, node, chunk);
1474 emit_sized_op(type_size, OP_ST64K, OP_ST32K, OP_ST16K, OP_ST8K,
1475 reg_dst, res_name.idx, 0, node, chunk);
1317 } break; 1476 } break;
1318 case COMP_REG: { 1477 case COMP_REG: {
1319 if (str_eq(node->var.val->type, cstr("str"))) { 1478 if (type_size > 8) {
1320 // Get the address for the local/global storage 1479 sz size_const = add_constant(chunk, type_size);
1321 // variable. 1480 emit_op(OP_MEMCPYI, res_name.idx, res_val.idx, size_const, node,
1322 sz var_addr = chunk->reg_idx++;
1323 emit_op(op_ldaddr, var_addr, map->val.idx, 0, node, chunk);
1324
1325 // Copy the fat pointer.
1326 emit_fat_copy(chunk, node, var_addr, res.idx);
1327 } else {
1328 emit_op(op_stvar, map->val.idx, res.idx, 0, node->var.val,
1329 chunk); 1481 chunk);
1482 } else {
1483 emit_sized_op(type_size, OP_ST64K, OP_ST32K, OP_ST16K, OP_ST8K,
1484 res_val.idx, res_name.idx, 0, node, chunk);
1330 } 1485 }
1331 } break; 1486 } break;
1487 case COMP_STRING: {
1488 // Get the address for the string value variable.
1489 sz str_addr = chunk->reg_idx++;
1490 emit_op(OP_LDSTR, str_addr, res_name.idx, 0, node->var.val, chunk);
1491
1492 // Copy the fat pointer.
1493 emit_fat_copy(chunk, node, res_name.idx, str_addr);
1494 } break;
1332 default: { 1495 default: {
1333 emit_compile_err(compiler, chunk, node); 1496 emit_compile_err(compiler, chunk, node);
1334 return (CompResult){.type = COMP_ERR}; 1497 return (CompResult){.type = COMP_ERR};
@@ -1338,98 +1501,147 @@ compile_set(Compiler *compiler, Chunk *chunk, Node *node) {
1338} 1501}
1339 1502
1340CompResult 1503CompResult
1504compile_dot(Compiler *compiler, Chunk *chunk, Node *node) {
1505 CompResult left = compile_expr(compiler, chunk, node->binary.left);
1506
1507 // Need to pass this to the child function for them to properly increment
1508 // the address.
1509 compiler->has_addr = true;
1510 compiler->reg_addr = left.idx;
1511
1512 // Pointers can be accessed directly with dot syntax, no need for a->b.
1513 if (str_has_prefix(node->binary.left->type, cstr("@"))) {
1514 compiler->reg_addr = chunk->reg_idx++;
1515 emit_op(OP_LD64K, compiler->reg_addr, left.idx, 0, node, chunk);
1516 }
1517
1518 CompResult right = compile_expr(compiler, chunk, node->binary.right);
1519 compiler->has_addr = false;
1520 return (CompResult){.type = COMP_REG, .idx = right.idx};
1521}
1522
1523CompResult
1341compile_symbol(Compiler *compiler, Chunk *chunk, Node *node) { 1524compile_symbol(Compiler *compiler, Chunk *chunk, Node *node) {
1342 Str name = node->unique_name; 1525 Str name = node->unique_name;
1343 StrVarMap *map = NULL; 1526 Str type_name = node->type;
1344 Chunk *next = chunk; 1527 SymbolMap *map = symmap_lookup(&compiler->symbols, name);
1345 while (next) { 1528
1346 map = varmap_lookup(&next->varmap, name); 1529 // For a struct field, the base address must be passed via the
1347 if (map) { 1530 // compiler->reg_addr. This will calculate the offset and return and
1348 break; 1531 // address or the full value.
1532 if (map->val.kind == SYM_STRUCT_FIELD) {
1533 sz size = map->val.t.element_size;
1534 sz off = add_constant(chunk, map->val.t.offset);
1535 sz reg_addr = chunk->reg_idx++;
1536 emit_op(OP_ADDI, reg_addr, compiler->reg_addr, off, node, chunk);
1537 if (node->lvalue) {
1538 return (CompResult){.type = COMP_CONST, .idx = reg_addr};
1349 } 1539 }
1350 next = next->parent; 1540 sz reg_dst = chunk->reg_idx++;
1541 emit_sized_op(size, OP_LD64K, OP_LD32K, OP_LD16K, OP_LD8K, reg_dst,
1542 reg_addr, 0, node, chunk);
1543 return (CompResult){.type = COMP_REG, .idx = reg_dst};
1351 } 1544 }
1352 if (!map) { 1545
1353 emit_compile_err(compiler, chunk, node); 1546 if (map->val.kind == SYM_FUN) {
1354 return (CompResult){.type = COMP_ERR}; 1547 sz reg_dst = chunk->reg_idx++;
1548 sz fun_idx = map->val.fun->index;
1549 emit_op(OP_LDFUNPTR, reg_dst, fun_idx, 0, node, chunk);
1550 return (CompResult){.type = COMP_REG, .idx = reg_dst};
1355 } 1551 }
1356 sz op_ldaddr = OP_LDLADDR; 1552
1357 sz op_ldvar = OP_LDLVAR; 1553 // If we are here, symbol is a variable.
1358 if (next == &compiler->main_chunk) { 1554 sz size = map->val.var.type.size;
1359 op_ldaddr = OP_LDGADDR; 1555 sz idx = map->val.var.idx;
1360 op_ldvar = OP_LDGVAR; 1556 sz op_ldaddr = map->val.kind == SYM_GLOBALVAR ? OP_LDGADDR : OP_LDLADDR;
1557 sz reg_addr = chunk->reg_idx++;
1558 sz reg_dst = reg_addr;
1559 emit_op(op_ldaddr, reg_addr, idx, 0, node, chunk);
1560
1561 // Indexing a pointer to static array is allowed.
1562 if (node->parent && node->parent->kind == NODE_INDEX) {
1563 if (str_has_prefix(type_name, cstr("@["))) {
1564 emit_op(OP_LD64K, reg_addr, reg_addr, 0, node, chunk);
1565 }
1566 size = map->val.var.type.element_size;
1567 }
1568
1569 SymbolMap *tmap = symmap_lookup(&compiler->symbols, type_name);
1570 if (compiler->has_addr) {
1571 emit_op(OP_ADD, reg_addr, reg_addr, compiler->reg_addr, node, chunk);
1361 } 1572 }
1362 Variable var = map->val; 1573 // TODO: move these `str_` checks into the type system and check for
1363 u8 reg_dst = chunk->reg_idx++; 1574 // tmap->val.kind or size? Likewise, strings should be treated as a []U8
1364 if (node->is_ptr || str_has_prefix(var.type, cstr("[]")) || 1575 // slice, which is what they are after all.
1365 str_eq(var.type, cstr("str"))) { 1576 if (node->lvalue || str_eq(type_name, cstr("Str")) ||
1366 emit_op(op_ldaddr, reg_dst, var.idx, 0, node, chunk); 1577 tmap->val.kind == SYM_STRUCT) {
1367 } else { 1578 } else {
1368 emit_op(op_ldvar, reg_dst, var.idx, 0, node, chunk); 1579 emit_sized_op(size, OP_LD64K, OP_LD32K, OP_LD16K, OP_LD8K, reg_dst,
1580 reg_addr, 0, node, chunk);
1581 // TODO: if variable is too big and we want to write it, put it on the
1582 // stack.
1369 } 1583 }
1370 return (CompResult){.type = COMP_REG, .idx = reg_dst}; 1584 return (CompResult){.type = COMP_REG, .idx = reg_dst};
1371} 1585}
1372 1586
1373CompResult 1587CompResult
1374compile_symbol_idx(Compiler *compiler, Chunk *chunk, Node *node) { 1588compile_index(Compiler *compiler, Chunk *chunk, Node *node) {
1375 Str name = node->unique_name; 1589 // Load a register with the zero offset.
1376 StrVarMap *map = NULL; 1590 sz reg_offset = 0;
1377 Chunk *next = chunk; 1591 if (compiler->has_addr) {
1378 while (next) { 1592 reg_offset = compiler->reg_addr;
1379 map = varmap_lookup(&next->varmap, name);
1380 if (map) {
1381 break;
1382 }
1383 next = next->parent;
1384 }
1385 if (!map) {
1386 eprintln("couldn't resolve symbol name: %s", name);
1387 emit_compile_err(compiler, chunk, node);
1388 return (CompResult){.type = COMP_ERR};
1389 }
1390 sz op_ldaddr = OP_LDLADDR;
1391 sz op_ldvar = OP_LDLVAR;
1392 if (next == &compiler->main_chunk) {
1393 op_ldaddr = OP_LDGADDR;
1394 op_ldvar = OP_LDGVAR;
1395 }
1396
1397 // Destination.
1398 u8 reg_dst = chunk->reg_idx++;
1399
1400 // Address.
1401 sz reg_addr = chunk->reg_idx++;
1402 if (str_has_prefix(map->val.type, cstr("[]"))) {
1403 emit_op(op_ldaddr, reg_addr, map->val.idx, 0, node->var.val, chunk);
1404 } else { 1593 } else {
1405 emit_op(op_ldvar, reg_addr, map->val.idx, 0, node->var.val, chunk); 1594 reg_offset = chunk->reg_idx++;
1595 sz base_addr = add_constant(chunk, 0);
1596 emit_op(OP_LDCONST, reg_offset, base_addr, 0, node, chunk);
1406 } 1597 }
1407 1598 Node *next = node;
1408 // Index. 1599 while (next && next->t.next) {
1409 CompResult idx = compile_expr(compiler, chunk, node->sym.arr_size); 1600 Str type_name = next->type;
1410 switch (idx.type) { 1601 SymbolMap *map = symmap_lookup(&compiler->symbols, type_name);
1411 case COMP_CONST: { 1602 sz type_size = map->val.t.size;
1412 emit_op(OP_LD64I, reg_dst, reg_addr, idx.idx, node, chunk); 1603 sz size = add_constant(chunk, type_size);
1413 } break; 1604
1414 case COMP_REG: { 1605 sz reg_mul = chunk->reg_idx++;
1415 emit_op(OP_LD64, reg_dst, reg_addr, idx.idx, node, chunk); 1606 CompResult res = compile_expr(compiler, chunk, next->idx.value);
1416 } break; 1607 switch (res.type) {
1417 default: { 1608 case COMP_CONST: {
1418 emit_compile_err(compiler, chunk, node); 1609 sz reg_idx = chunk->reg_idx++;
1419 return (CompResult){.type = COMP_ERR}; 1610 emit_op(OP_LDCONST, reg_idx, res.idx, 0, node, chunk);
1420 } break; 1611 emit_op(OP_MULI, reg_mul, reg_idx, size, node, chunk);
1612 } break;
1613 case COMP_REG: {
1614 emit_op(OP_MULI, reg_mul, res.idx, size, node, chunk);
1615 } break;
1616 default: {
1617 emit_compile_err(compiler, chunk, node);
1618 return (CompResult){.type = COMP_ERR};
1619 } break;
1620 }
1621 emit_op(OP_ADD, reg_offset, reg_offset, reg_mul, node, chunk);
1622 next = next->t.next;
1623 if (next->t.next) {
1624 // Ensure we are maintaining single assignment policy.
1625 sz tmp = chunk->reg_idx++;
1626 emit_op(OP_MOV64, tmp, reg_offset, reg_mul, node, chunk);
1627 reg_offset = tmp;
1628 }
1421 } 1629 }
1422 // TODO: hardcoding the type size for now (LD64/LD64I). 1630 compiler->reg_addr = reg_offset;
1423 return (CompResult){.type = COMP_REG, .idx = reg_dst}; 1631 compiler->has_addr = true;
1632 CompResult res = compile_expr(compiler, chunk, next);
1633 compiler->has_addr = false;
1634 return res;
1424} 1635}
1425 1636
1426CompResult 1637CompResult
1427compile_expr(Compiler *compiler, Chunk *chunk, Node *node) { 1638compile_expr(Compiler *compiler, Chunk *chunk, Node *node) {
1428 switch (node->kind) { 1639 switch (node->kind) {
1640 case NODE_DOT: return compile_dot(compiler, chunk, node);
1429 case NODE_BREAK: return compile_break(compiler, chunk, node); 1641 case NODE_BREAK: return compile_break(compiler, chunk, node);
1430 case NODE_CONTINUE: return compile_continue(compiler, chunk, node); 1642 case NODE_CONTINUE: return compile_continue(compiler, chunk, node);
1431 case NODE_RETURN: return compile_return(compiler, chunk, node); 1643 case NODE_RETURN: return compile_return(compiler, chunk, node);
1432 case NODE_FUN: return compile_function(compiler, chunk, node); 1644 case NODE_FUN: return (CompResult){.type = COMP_NIL};
1433 case NODE_FUNCALL: return compile_funcall(compiler, chunk, node); 1645 case NODE_FUNCALL: return compile_funcall(compiler, chunk, node);
1434 case NODE_WHILE: return compile_while(compiler, chunk, node); 1646 case NODE_WHILE: return compile_while(compiler, chunk, node);
1435 case NODE_IF: return compile_if(compiler, chunk, node); 1647 case NODE_IF: return compile_if(compiler, chunk, node);
@@ -1438,7 +1650,7 @@ compile_expr(Compiler *compiler, Chunk *chunk, Node *node) {
1438 case NODE_BITNOT: 1650 case NODE_BITNOT:
1439 case NODE_NOT: return compile_unary(compiler, chunk, node); break; 1651 case NODE_NOT: return compile_unary(compiler, chunk, node); break;
1440 case NODE_AND: 1652 case NODE_AND:
1441 case NODE_OR: 1653 case NODE_OR: return compile_binary_logic(compiler, chunk, node); break;
1442 case NODE_EQ: 1654 case NODE_EQ:
1443 case NODE_NEQ: 1655 case NODE_NEQ:
1444 case NODE_LT: 1656 case NODE_LT:
@@ -1480,15 +1692,56 @@ compile_expr(Compiler *compiler, Chunk *chunk, Node *node) {
1480 case NODE_LET: return compile_let(compiler, chunk, node); 1692 case NODE_LET: return compile_let(compiler, chunk, node);
1481 case NODE_SET: return compile_set(compiler, chunk, node); 1693 case NODE_SET: return compile_set(compiler, chunk, node);
1482 case NODE_SYMBOL: return compile_symbol(compiler, chunk, node); 1694 case NODE_SYMBOL: return compile_symbol(compiler, chunk, node);
1483 case NODE_SYMBOL_IDX: return compile_symbol_idx(compiler, chunk, node); 1695 case NODE_PTR: {
1696 Str name = node->t.next->unique_name;
1697 SymbolMap *map = symmap_lookup(&compiler->symbols, name);
1698 sz idx = map->val.var.idx;
1699 sz op_ldaddr =
1700 map->val.kind == SYM_GLOBALVAR ? OP_LDGADDR : OP_LDLADDR;
1701
1702 sz reg_addr = chunk->reg_idx++;
1703 emit_op(op_ldaddr, reg_addr, idx, 0, node, chunk);
1704 return (CompResult){.type = COMP_REG, .idx = reg_addr};
1705 } break;
1706 case NODE_DEREF: {
1707 Str type_name = node->type;
1708 SymbolMap *map = symmap_lookup(&compiler->symbols, type_name);
1709 sz type_size = map->val.t.element_size;
1710 Node *next = node->deref.next;
1711 CompResult res = compile_symbol(compiler, chunk, node);
1712 sz reg_dst = res.idx;
1713 sz reg_src = res.idx;
1714 while (next) {
1715 reg_dst = chunk->reg_idx++;
1716 if (next->kind == NODE_SYMBOL) {
1717 break;
1718 }
1719 emit_op(OP_LD64K, reg_dst, reg_src, 0, node, chunk);
1720 reg_src = reg_dst;
1721 next = next->deref.next;
1722 }
1723 if (!node->lvalue) {
1724 emit_sized_op(type_size, OP_LD64K, OP_LD32K, OP_LD16K, OP_LD8K,
1725 reg_dst, reg_src, 0, node, chunk);
1726 } else {
1727 emit_op(OP_LD64K, reg_dst, reg_src, 0, node, chunk);
1728 }
1729 return (CompResult){.type = COMP_REG, .idx = reg_dst};
1730 } break;
1731 case NODE_INDEX: return compile_index(compiler, chunk, node);
1484 case NODE_BLOCK: { 1732 case NODE_BLOCK: {
1485 CompResult res; 1733 CompResult res;
1486 for (sz i = 0; i < array_size(node->elements); i++) { 1734 for (sz i = 0; i < array_size(node->elements); i++) {
1487 Node *root = node->elements[i]; 1735 Node *root = node->elements[i];
1488 res = compile_expr(compiler, chunk, root); 1736 res = compile_expr(compiler, chunk, root);
1737 if (root->kind == NODE_BREAK || root->kind == NODE_CONTINUE ||
1738 root->kind == NODE_RETURN) {
1739 break;
1740 }
1489 } 1741 }
1490 return res; 1742 return res;
1491 } break; 1743 } break;
1744 case NODE_STRUCT:
1492 case NODE_NIL: return (CompResult){.type = COMP_NIL}; 1745 case NODE_NIL: return (CompResult){.type = COMP_NIL};
1493 default: { 1746 default: {
1494 eprintln("error: compilation not implemented for node %s", 1747 eprintln("error: compilation not implemented for node %s",
@@ -1501,30 +1754,48 @@ compile_expr(Compiler *compiler, Chunk *chunk, Node *node) {
1501} 1754}
1502 1755
1503void 1756void
1504disassemble_instruction(Instruction instruction) { 1757disassemble_instruction(Chunk chunk, Instruction instruction) {
1758 print("%s", op_str[instruction.op]);
1759 for (sz i = 0; i < 11 - op_str[instruction.op].size; i++) {
1760 print(" ");
1761 }
1505 switch (instruction.op) { 1762 switch (instruction.op) {
1763 case OP_RECUR:
1506 case OP_CALL: 1764 case OP_CALL:
1507 println("%s f%d", op_str[instruction.op], instruction.dst, 1765 println("f%d", instruction.dst, instruction.a, instruction.b);
1508 instruction.a, instruction.b); 1766 break;
1767 case OP_LDFUNPTR:
1768 println("r%d, f%d", instruction.dst, instruction.a, instruction.b);
1509 break; 1769 break;
1510 case OP_MOV8: 1770 case OP_MOV8:
1511 case OP_MOV16: 1771 case OP_MOV16:
1512 case OP_MOV32: 1772 case OP_MOV32:
1513 case OP_MOV64: 1773 case OP_MOV64:
1514 println("%s r%d, r%d", op_str[instruction.op], instruction.dst,
1515 instruction.a, instruction.b);
1516 break;
1517 case OP_JMPF:
1518 case OP_JMPT:
1519 println("%s l%d, r%d", op_str[instruction.op], instruction.dst,
1520 instruction.a, instruction.b);
1521 break;
1522 case OP_LD8K: 1774 case OP_LD8K:
1523 case OP_LD16K: 1775 case OP_LD16K:
1524 case OP_LD32K: 1776 case OP_LD32K:
1525 case OP_LD64K: 1777 case OP_LD64K:
1526 println("%s r%d, c%d", op_str[instruction.op], instruction.dst, 1778 case OP_ST8K:
1527 instruction.a, instruction.b); 1779 case OP_ST16K:
1780 case OP_ST32K:
1781 case OP_ST64K:
1782 println("r%d, r%d", instruction.dst, instruction.a, instruction.b);
1783 break;
1784 case OP_JMPF:
1785 case OP_JMPT:
1786 println("l%d, r%d", instruction.dst, instruction.a, instruction.b);
1787 break;
1788 case OP_MOV8I:
1789 case OP_MOV16I:
1790 case OP_MOV32I:
1791 case OP_MOV64I:
1792 case OP_LDCONST:
1793 if (array_size(chunk.constants) != 0) {
1794 println("r%d, c%d (%x)", instruction.dst, instruction.a,
1795 chunk.constants[instruction.a].u);
1796 } else {
1797 println("r%d, c%d", instruction.dst, instruction.a);
1798 }
1528 break; 1799 break;
1529 case OP_LD8I: 1800 case OP_LD8I:
1530 case OP_LD16I: 1801 case OP_LD16I:
@@ -1557,8 +1828,14 @@ disassemble_instruction(Instruction instruction) {
1557 case OP_BITANDI: 1828 case OP_BITANDI:
1558 case OP_BITORI: 1829 case OP_BITORI:
1559 case OP_BITXORI: 1830 case OP_BITXORI:
1560 println("%s r%d, r%d, c%d", op_str[instruction.op], instruction.dst, 1831 case OP_MEMCPYI:
1561 instruction.a, instruction.b); 1832 if (array_size(chunk.constants) != 0) {
1833 println("r%d, r%d, c%d (%x)", instruction.dst, instruction.a,
1834 instruction.b, chunk.constants[instruction.b].u);
1835 } else {
1836 println("r%d, r%d, c%d", instruction.dst, instruction.a,
1837 instruction.b);
1838 }
1562 break; 1839 break;
1563 case OP_LD8: 1840 case OP_LD8:
1564 case OP_LD16: 1841 case OP_LD16:
@@ -1591,84 +1868,112 @@ disassemble_instruction(Instruction instruction) {
1591 case OP_BITAND: 1868 case OP_BITAND:
1592 case OP_BITOR: 1869 case OP_BITOR:
1593 case OP_BITXOR: 1870 case OP_BITXOR:
1594 println("%s r%d, r%d, r%d", op_str[instruction.op], instruction.dst, 1871 case OP_MEMCPY:
1595 instruction.a, instruction.b); 1872 println("r%d, r%d, r%d", instruction.dst, instruction.a,
1873 instruction.b);
1596 break; 1874 break;
1597 case OP_LDGVAR: 1875 case OP_LDGVAR:
1598 case OP_LDGADDR: 1876 case OP_LDGADDR:
1599 case OP_LDLVAR: 1877 case OP_LDLVAR:
1600 case OP_LDLADDR: 1878 case OP_LDLADDR:
1601 println("%s r%d, v%d", op_str[instruction.op], instruction.dst, 1879 println("r%d, v%d", instruction.dst, instruction.a, instruction.b);
1602 instruction.a, instruction.b);
1603 break; 1880 break;
1604 case OP_LDSTR: 1881 case OP_LDSTR:
1605 println("%s r%d, s%d", op_str[instruction.op], instruction.dst, 1882 println("r%d, s%d", instruction.dst, instruction.a, instruction.b);
1606 instruction.a, instruction.b);
1607 break; 1883 break;
1608 case OP_STGVAR: 1884 case OP_STGVAR:
1609 case OP_STLVAR: 1885 case OP_STLVAR:
1610 println("%s v%d, r%d", op_str[instruction.op], instruction.dst, 1886 println("v%d, r%d", instruction.dst, instruction.a, instruction.b);
1611 instruction.a, instruction.b);
1612 break; 1887 break;
1613 case OP_STGVARI: 1888 case OP_STGVARI:
1614 case OP_STLVARI: 1889 case OP_STLVARI:
1615 println("%s v%d, c%d", op_str[instruction.op], instruction.dst, 1890 if (array_size(chunk.constants) != 0) {
1616 instruction.a, instruction.b); 1891 println("v%d, c%d (%x)", instruction.dst, instruction.a,
1892 chunk.constants[instruction.a].u);
1893 } else {
1894 println("v%d, c%d", instruction.dst, instruction.a);
1895 }
1617 break; 1896 break;
1618 case OP_BITNOTI: 1897 case OP_BITNOTI:
1619 case OP_NOTI: 1898 case OP_NOTI:
1620 println("%s r%d, c%d", op_str[instruction.op], instruction.dst, 1899 if (array_size(chunk.constants) != 0) {
1621 instruction.a, instruction.b); 1900 println("r%d, c%d (%x)", instruction.dst, instruction.a,
1901 chunk.constants[instruction.a].u);
1902 } else {
1903 println("r%d, c%d", instruction.dst, instruction.a);
1904 }
1622 break; 1905 break;
1623 case OP_BITNOT: 1906 case OP_BITNOT:
1624 case OP_NOT: 1907 case OP_NOT:
1625 println("%s r%d, r%d", op_str[instruction.op], instruction.dst, 1908 println("r%d, r%d", instruction.dst, instruction.a, instruction.b);
1626 instruction.a, instruction.b);
1627 break; 1909 break;
1628 case OP_JMP: 1910 case OP_JMP:
1629 println("%s l%d", op_str[instruction.op], instruction.dst, 1911 println("l%d", instruction.dst, instruction.a, instruction.b);
1630 instruction.a, instruction.b);
1631 break; 1912 break;
1632 case OP_JMPFI: 1913 case OP_JMPFI:
1633 case OP_JMPTI: 1914 case OP_JMPTI:
1634 println("%s l%d, c%d", op_str[instruction.op], instruction.dst, 1915 if (array_size(chunk.constants) != 0) {
1635 instruction.a, instruction.b); 1916 println("l%d, c%d (%x)", instruction.dst, instruction.a,
1917 chunk.constants[instruction.a].u);
1918 } else {
1919 println("l%d, c%d", instruction.dst, instruction.a);
1920 }
1636 break; 1921 break;
1922 case OP_PRINTS8:
1923 case OP_PRINTS16:
1924 case OP_PRINTS32:
1637 case OP_PRINTS64: 1925 case OP_PRINTS64:
1926 case OP_PRINTU8:
1927 case OP_PRINTU16:
1928 case OP_PRINTU32:
1929 case OP_PRINTU64:
1930 case OP_PRINTF32:
1638 case OP_PRINTF64: 1931 case OP_PRINTF64:
1639 case OP_PRINTSTR: 1932 case OP_PRINTSTR:
1640 case OP_PRINTBOOL: 1933 case OP_PRINTBOOL:
1641 case OP_PUSH: 1934 case OP_PUSH:
1642 case OP_POP: 1935 case OP_POP:
1643 case OP_PUTRET: 1936 case OP_PUTRET:
1644 println("%s r%d", op_str[instruction.op], instruction.dst, 1937 println("r%d", instruction.dst, instruction.a, instruction.b);
1645 instruction.a, instruction.b);
1646 break; 1938 break;
1647 case OP_PRINTSTRI: 1939 case OP_PRINTSTRI:
1940 case OP_PRINTS8I:
1941 case OP_PRINTS16I:
1942 case OP_PRINTS32I:
1648 case OP_PRINTS64I: 1943 case OP_PRINTS64I:
1944 case OP_PRINTU8I:
1945 case OP_PRINTU16I:
1946 case OP_PRINTU32I:
1947 case OP_PRINTU64I:
1948 case OP_PRINTF32I:
1649 case OP_PRINTF64I: 1949 case OP_PRINTF64I:
1650 case OP_PRINTBOOLI: 1950 case OP_PRINTBOOLI:
1651 case OP_RESERVE: 1951 case OP_RESERVE:
1652 case OP_PUSHI: 1952 case OP_PUSHI:
1653 case OP_PUTRETI: 1953 case OP_PUTRETI:
1654 println("%s c%d", op_str[instruction.op], instruction.dst, 1954 if (array_size(chunk.constants) != 0) {
1655 instruction.a, instruction.b); 1955 println("c%d (%x)", instruction.dst,
1956 chunk.constants[instruction.dst].u);
1957 } else {
1958 println("c%d", instruction.dst);
1959 }
1656 break; 1960 break;
1657 case OP_RET: 1961 case OP_RET:
1658 case OP_HALT: println("%s", op_str[instruction.op]); break; 1962 case OP_RECUR_SELF:
1963 case OP_HALT: println(""); break;
1659 default: println("Unknown opcode %d", instruction.op); break; 1964 default: println("Unknown opcode %d", instruction.op); break;
1660 } 1965 }
1661} 1966}
1662 1967
1663void 1968void
1664disassemble_chunk(Chunk chunk) { 1969disassemble_chunk(Chunk chunk) {
1970 println("═════════════════════════════════════════════════════");
1665 println("CHUNK %d: %s%s", chunk.id, chunk.file_name, chunk.name); 1971 println("CHUNK %d: %s%s", chunk.id, chunk.file_name, chunk.name);
1666 println("n_regs: %d, n_vars: %d, n_strings: %d, n_consts: %d", 1972 println("n_regs: %d, n_vars: %d, n_strings: %d, n_consts: %d",
1667 chunk.reg_idx, array_size(chunk.vars), chunk.str_idx, 1973 chunk.reg_idx, chunk.fun.n_vars, chunk.str_idx, chunk.const_idx);
1668 chunk.const_idx); 1974 println("═══════════════════════ code ════════════════════════");
1669 println("================== code =================="); 1975 println(" LINE:COL INUM LABELS OP OPERANDS");
1670 println(" LINE:COL INUM LABELS OP OPERANDS "); 1976 println("─────────────────────────────────────────────────────");
1671 println("------------------------------------------");
1672 for (sz i = 0; i < array_size(chunk.code); i++) { 1977 for (sz i = 0; i < array_size(chunk.code); i++) {
1673 printf(" %.4ld:%.4ld %.4lx ", chunk.linecol[i].line, 1978 printf(" %.4ld:%.4ld %.4lx ", chunk.linecol[i].line,
1674 chunk.linecol[i].col, i); 1979 chunk.linecol[i].col, i);
@@ -1692,90 +1997,106 @@ disassemble_chunk(Chunk chunk) {
1692 } else { 1997 } else {
1693 printf(" "); 1998 printf(" ");
1694 } 1999 }
1695 disassemble_instruction(chunk.code[i]); 2000 disassemble_instruction(chunk, chunk.code[i]);
1696 } 2001 }
1697 if (array_size(chunk.constants) > 0) { 2002 if (array_size(chunk.constants) > 0) {
1698 println("================ constants ===============", chunk.file_name); 2003 println("═════════════════════ constants ═════════════════════");
1699 for (sz i = 0; i < array_size(chunk.constants); i++) { 2004 for (sz i = 0; i < array_size(chunk.constants); i++) {
1700 println(" %x{2}: %x{8}", i, chunk.constants[i]); 2005 println(" %x{2}: %x{8}", i, chunk.constants[i]);
1701 } 2006 }
1702 } 2007 }
1703 if (array_size(chunk.strings) > 0) { 2008 if (array_size(chunk.strings) > 0) {
1704 println("================= strings ================", chunk.file_name); 2009 println("══════════════════════ strings ══════════════════════");
1705 for (sz i = 0; i < array_size(chunk.strings); i++) { 2010 for (sz i = 0; i < array_size(chunk.strings); i++) {
1706 println(" %x{2}: %s", i, chunk.strings[i]); 2011 println(" %x{2}: %s", i, chunk.strings[i]);
1707 } 2012 }
1708 } 2013 }
1709 if (array_size(chunk.vars) > 0) { 2014 if (chunk.fun.n_vars > 0) {
1710 println("================ variables ===============", chunk.file_name); 2015 println("═════════════════════ variables ═════════════════════");
1711 for (sz i = 0; i < array_size(chunk.vars); i++) { 2016 Variable *vars = chunk.fun.vars;
1712 println(" %x{2}: [%x{4}:%x{4}] %s: %s", i, chunk.vars[i].offset, 2017 for (sz i = 0; i < chunk.fun.n_vars; i++) {
1713 chunk.vars[i].offset + chunk.vars[i].size, 2018 println(" %x{2}: [%x{4}:%x{4}] %s: %s", i, vars[i].offset,
1714 chunk.vars[i].name, chunk.vars[i].type); 2019 vars[i].offset + vars[i].size, vars[i].name,
1715 } 2020 vars[i].type.id);
1716 }
1717 if (array_size(chunk.functions) > 0) {
1718 println("================ functions ===============", chunk.file_name);
1719 for (sz i = 0; i < array_size(chunk.functions); i++) {
1720 Chunk *func = chunk.functions[i];
1721 println(" %x{2}: func%d: %s", i, func->id, func->name);
1722 } 2021 }
1723 } 2022 }
1724 println("==========================================");
1725 for (sz i = 0; i < array_size(chunk.functions); i++) {
1726 Chunk *func = chunk.functions[i];
1727 disassemble_chunk(*func);
1728 }
1729} 2023}
1730 2024
1731void 2025void
1732bytecode_compiler(Compiler *compiler, Parser parser) { 2026bytecode_compiler(Compiler *compiler, Analyzer analyzer) {
1733 compiler->main_chunk = (Chunk){ 2027 // Load quicklook tables..
1734 .file_name = compiler->file_name, 2028 compiler->numeric_types = analyzer.numeric_types;
1735 .storage = compiler->storage, 2029 compiler->integer_types = analyzer.integer_types;
1736 .name = cstr(".main"), 2030 compiler->uint_types = analyzer.uint_types;
1737 }; 2031 compiler->sint_types = analyzer.sint_types;
1738 // TODO: Fill up builtin types and tables. 2032 compiler->float_types = analyzer.float_types;
1739 array_zero(compiler->main_chunk.constants, 256, compiler->storage); 2033
1740 array_zero(compiler->main_chunk.code, 0xffff, compiler->storage); 2034 // Flatten the symbol table into a single map.
1741 sz n_roots = array_size(parser.nodes); 2035 for (sz i = 0; i < array_size(analyzer.scopes); i++) {
1742 CompResult res = {0}; 2036 Arena scratch = *analyzer.storage;
1743 2037 Scope *scope = analyzer.scopes[i];
1744 // Do a first pass to setup the function declarations on the main scope. 2038 SymbolMapIter iter = symmap_iterator(scope->symbols, &scratch);
1745 Chunk *chunk = &compiler->main_chunk; 2039 SymbolMap *m = symmap_next(&iter, &scratch);
1746 for (sz i = 0; i < n_roots; i++) { 2040 while (m) {
1747 Node *root = parser.nodes[i]; 2041 Symbol sym = m->val;
1748 if (root->kind == NODE_FUN) { 2042 symmap_insert(&compiler->symbols, sym.id, sym, compiler->storage);
1749 declare_function(chunk, root); 2043 m = symmap_next(&iter, &scratch);
1750 } 2044 }
1751 } 2045 }
1752 2046
1753 // Compile all root expressions. 2047 // Initialize chunks for all functions.
1754 for (sz i = 0; i < n_roots; i++) { 2048 array_zero(compiler->chunks, analyzer.n_funcs, compiler->storage);
1755 Node *root = parser.nodes[i]; 2049 array_head(compiler->chunks)->size = analyzer.n_funcs;
1756 res = compile_expr(compiler, chunk, root); 2050 FunMapIter iter = funmap_iterator(analyzer.fun_map, compiler->storage);
2051 FunMap *m = funmap_next(&iter, compiler->storage);
2052 while (m) {
2053 Function fun = m->val;
2054 compiler->chunks[fun.index] = chunk_alloc(compiler);
2055 compiler->chunks[fun.index]->id = fun.index;
2056 compiler->chunks[fun.index]->fun = fun;
2057 compiler->chunks[fun.index]->name = fun.id;
2058 m = funmap_next(&iter, compiler->storage);
1757 } 2059 }
1758 2060
1759 // Make sure the last result is on r0. 2061 // Compile all the chunks.
1760 sz res_reg = 0; 2062 for (sz i = 0; i < analyzer.n_funcs; i++) {
1761 bool is_nil = false; 2063 Chunk *chunk = compiler->chunks[i];
1762 switch (res.type) { 2064 CompResult res = compile_expr(compiler, chunk, chunk->fun.body);
1763 case COMP_CONST: { 2065 if (i == 0) {
1764 res_reg = chunk->reg_idx++; 2066 // Make sure the last result is on r0.
1765 Instruction inst = 2067 sz res_reg = 0;
1766 (Instruction){.op = OP_LD64K, .dst = res_reg, .a = res.idx}; 2068 bool is_nil = false;
1767 array_push(chunk->code, inst, chunk->storage); 2069 switch (res.type) {
1768 } break; 2070 case COMP_CONST: {
1769 case COMP_REG: { 2071 res_reg = chunk->reg_idx++;
1770 res_reg = res.idx; 2072 Instruction inst = (Instruction){
1771 } break; 2073 .op = OP_LDCONST, .dst = res_reg, .a = res.idx};
1772 case COMP_NIL: { 2074 array_push(chunk->code, inst, chunk->storage);
1773 is_nil = true; 2075 } break;
1774 } break; 2076 case COMP_REG: {
1775 default: break; 2077 res_reg = res.idx;
2078 } break;
2079 case COMP_NIL: {
2080 is_nil = true;
2081 } break;
2082 default: break;
2083 }
2084 emit_op(OP_HALT, res_reg, !is_nil, 0, NULL, chunk);
2085 } else {
2086 // Put return values into memory.
2087 switch (res.type) {
2088 case COMP_CONST: {
2089 emit_op(OP_PUTRETI, res.idx, 0, 0, chunk->fun.body, chunk);
2090 } break;
2091 case COMP_REG: {
2092 emit_op(OP_PUTRET, res.idx, 0, 0, chunk->fun.body, chunk);
2093 } break;
2094 default: break;
2095 }
2096 emit_op(OP_RET, 0, 0, 0, chunk->fun.body, chunk);
2097 }
2098 verify_chunk(chunk);
1776 } 2099 }
1777 emit_op(OP_HALT, res_reg, !is_nil, 0, NULL, chunk);
1778 verify_chunk(chunk);
1779} 2100}
1780 2101
1781#endif // COMPILER_C 2102#endif // COMPILER_C
diff --git a/src/lexer.c b/src/lexer.c
index 22d7edc..a5b841b 100644
--- a/src/lexer.c
+++ b/src/lexer.c
@@ -57,8 +57,8 @@ typedef enum TokenKind {
57 57
58 // Logical ops. 58 // Logical ops.
59 TOK_NOT, // ! 59 TOK_NOT, // !
60 TOK_AND, // && 60 TOK_AND, // and
61 TOK_OR, // || 61 TOK_OR, // or
62 TOK_EQ, // == 62 TOK_EQ, // ==
63 TOK_NEQ, // != 63 TOK_NEQ, // !=
64 TOK_LT, // < 64 TOK_LT, // <
@@ -84,6 +84,7 @@ typedef enum TokenKind {
84 TOK_DOT, // . 84 TOK_DOT, // .
85 TOK_AT, // @ 85 TOK_AT, // @
86 TOK_ASSIGN, // = 86 TOK_ASSIGN, // =
87 TOK_ARROW, // ->
87 88
88 // End of file. 89 // End of file.
89 TOK_EOF, 90 TOK_EOF,
@@ -168,6 +169,7 @@ Str token_str[] = {
168 [TOK_DOT] = cstr("DOT"), 169 [TOK_DOT] = cstr("DOT"),
169 [TOK_AT] = cstr("AT"), 170 [TOK_AT] = cstr("AT"),
170 [TOK_ASSIGN] = cstr("ASSIGN"), 171 [TOK_ASSIGN] = cstr("ASSIGN"),
172 [TOK_ARROW] = cstr("ARROW"),
171 173
172 // End of file. 174 // End of file.
173 [TOK_EOF] = cstr("EOF"), 175 [TOK_EOF] = cstr("EOF"),
@@ -453,7 +455,7 @@ scan_token(Scanner *scanner) {
453 case '+': { 455 case '+': {
454 char p = scan_peek(scanner); 456 char p = scan_peek(scanner);
455 if (p >= '0' && p <= '9') { 457 if (p >= '0' && p <= '9') {
456 scan_next(scanner); 458 *scanner = current;
457 return emit_token_number(scanner); 459 return emit_token_number(scanner);
458 } 460 }
459 if (p == '=') { 461 if (p == '=') {
@@ -465,13 +467,17 @@ scan_token(Scanner *scanner) {
465 case '-': { 467 case '-': {
466 char p = scan_peek(scanner); 468 char p = scan_peek(scanner);
467 if (p >= '0' && p <= '9') { 469 if (p >= '0' && p <= '9') {
468 scan_next(scanner); 470 *scanner = current;
469 return emit_token_number(scanner); 471 return emit_token_number(scanner);
470 } 472 }
471 if (p == '=') { 473 if (p == '=') {
472 scan_next(scanner); 474 scan_next(scanner);
473 return emit_token(current, scanner, TOK_SUB_ASSIGN); 475 return emit_token(current, scanner, TOK_SUB_ASSIGN);
474 } 476 }
477 if (p == '>') {
478 scan_next(scanner);
479 return emit_token(current, scanner, TOK_ARROW);
480 }
475 return emit_token(current, scanner, TOK_SUB); 481 return emit_token(current, scanner, TOK_SUB);
476 }; 482 };
477 case '*': { 483 case '*': {
@@ -550,24 +556,14 @@ scan_token(Scanner *scanner) {
550 return emit_token(current, scanner, TOK_BITXOR); 556 return emit_token(current, scanner, TOK_BITXOR);
551 }; 557 };
552 case '&': { 558 case '&': {
553 char p = scan_peek(scanner); 559 if (scan_peek(scanner) == '=') {
554 if (p == '&') {
555 scan_next(scanner);
556 return emit_token(current, scanner, TOK_AND);
557 }
558 if (p == '=') {
559 scan_next(scanner); 560 scan_next(scanner);
560 return emit_token(current, scanner, TOK_BITOR_ASSIGN); 561 return emit_token(current, scanner, TOK_BITOR_ASSIGN);
561 } 562 }
562 return emit_token(current, scanner, TOK_BITAND); 563 return emit_token(current, scanner, TOK_BITAND);
563 }; 564 };
564 case '|': { 565 case '|': {
565 char p = scan_peek(scanner); 566 if (scan_peek(scanner) == '=') {
566 if (p == '|') {
567 scan_next(scanner);
568 return emit_token(current, scanner, TOK_OR);
569 }
570 if (p == '=') {
571 scan_next(scanner); 567 scan_next(scanner);
572 return emit_token(current, scanner, TOK_BITOR_ASSIGN); 568 return emit_token(current, scanner, TOK_BITOR_ASSIGN);
573 } 569 }
@@ -610,6 +606,11 @@ scan_token(Scanner *scanner) {
610 return emit_token_err(&current, cstr("unexpected character")); 606 return emit_token_err(&current, cstr("unexpected character"));
611 } 607 }
612 switch (val.mem[0]) { 608 switch (val.mem[0]) {
609 case 'a': {
610 if (str_eq(val, cstr("and"))) {
611 return emit_token(current, scanner, TOK_AND);
612 }
613 } break;
613 case 'b': { 614 case 'b': {
614 if (str_eq(val, cstr("break"))) { 615 if (str_eq(val, cstr("break"))) {
615 return emit_token(current, scanner, TOK_BREAK); 616 return emit_token(current, scanner, TOK_BREAK);
@@ -683,6 +684,11 @@ scan_token(Scanner *scanner) {
683 return emit_token(current, scanner, TOK_TRUE); 684 return emit_token(current, scanner, TOK_TRUE);
684 } 685 }
685 } break; 686 } break;
687 case 'o': {
688 if (str_eq(val, cstr("or"))) {
689 return emit_token(current, scanner, TOK_OR);
690 }
691 } break;
686 case 'w': { 692 case 'w': {
687 if (str_eq(val, cstr("while"))) { 693 if (str_eq(val, cstr("while"))) {
688 return emit_token(current, scanner, TOK_WHILE); 694 return emit_token(current, scanner, TOK_WHILE);
diff --git a/src/main.c b/src/main.c
index 65c05ff..b2e5e32 100644
--- a/src/main.c
+++ b/src/main.c
@@ -11,27 +11,103 @@
11 11
12// TODO: unions 12// TODO: unions
13// TODO: embed (binary file) and include (source file) 13// TODO: embed (binary file) and include (source file)
14// TODO: revisit ast parsing for pointers and arrays (I think I'm missing corner 14// NOTE: Syntax ideas.
15// cases). 15// []Int vs Int[], @sym vs sym@. On the right dereference, on the left
16// TODO: fix semantics for the following expression: 16// declaration or taking the address/reference of a variable. Similar
17// let b = int 17// to what Odin does.
18// a type shouldn't be a valid symbol name 18// let a: @Int pointer to int
19// TODO: consider making all tye types PascalCase: Int F64 Str... 19// let b: @@Int pointer to pointer to int (etc.)
20// TODO: add a `const` keyword that can only take literals or constexpr values. 20// let c: [123]Int static array -> @Int
21// let m: [32][32]Int multi-dimensional-arrays
22// let d: []Int slice / view -> struct Slice { u8* m ; sz s ; }
23// let e: [...]Int dynamic array -> struct DArry { u8* m ; sz s ; sz c ; }
24// let f: [123]@Int static array of int pointers [](@Int)
25// let g: @[123]Int pointer to a static array of integers @([123]Int)
26// let h: #[Str:Int] hash map of string keys and integer values
27// let i: #[Str:@Int] hash map of string keys and pointers to integer
28// let j: #[@Str:Int] hash map of string pointers to integers
29// let k: @#[Str:Int] pointer to a hash map of string to ints
30// let l: (Int Int -> Int) function pointer == @(fun(Int,Int):Int)
31// NOTE: Ideas for polymorphic types/functions:
32//
33// ; Use the :: operator to define generic types.
34// struct List :: T {
35// value: T
36// next: @List:T
37// }
38//
39// let x: List:Int ; Specialization, using a single colon.
40//
41// struct Pair :: T:K {
42// a: T
43// b: K
44// }
45//
46// let y: Pair:Int:Str ; In case of multiple types, use a colon for each.
47//
48// ; just before the return value we could specify the free types with the
49// ; :: operator, staying consistent with struct type declaration.
50// fun head(l: List:T) :: T -> T {
51// l.value
52// }
53//
54// fun get_first(x: Pair:T:K) :: T:K -> T {
55// x.a
56// }
57//
58// fun get_second(x: Pair:T:K) :: T:K -> K {
59// x.b
60// }
61//
62//
63// TODO: constexpr or const expressions could be evaluated with the bytecode
64// interpreter if we are performing compilation.
21// TODO: "first class functions" via function pointers 65// TODO: "first class functions" via function pointers
22// TODO: convenient function calls per data type instead of methods: 66// TODO: convenient function calls per data type instead of methods:
23// fun add(a: int, b: int): int a + b 67// fun add(a: int, b: int): int a + b
24// add(12, 34) == 12.add(34) 68// add(12, 34) == 12.add(34)
25// concat(str, str): str 69// concat(str, str): str
26// "hello ".concat("world") ; "hello world" 70// "hello ".concat("world") ; "hello world"
27// TODO: more numeric types
28// TODO: structs and user defined types 71// TODO: structs and user defined types
29// TODO: tail-call optimization.
30// TODO: constant folding 72// TODO: constant folding
31// TODO: constexpr evaluation
32// TODO: shortcircuit evaluation for && and || operators.
33// TODO: casting on demand (1:u16, 0x123:ptr, "hi":int ??? how to deal with 73// TODO: casting on demand (1:u16, 0x123:ptr, "hi":int ??? how to deal with
34// unsafe casts?) 74// unsafe casts?)
75// TODO: ensure we can't assign a variable to itself with set.
76// TODO: in D you can 'import' from anywhere, and that means that
77// dependencies are constrained to the type / function, which end up improving
78// compile times: https://www.youtube.com/watch?v=WsgW4HJXEAg
79// TODO: if we have methods inside structs, these are just acting as namespaces,
80// not virtual functions, meaning we don't pass around a `this` pointer around.
81//
82// struct MyInt {
83// num: Int
84// fun adder(a: MyInt b: MyInt) -> MyInt {
85// a.num + b.num
86// }
87// }
88//
89// Can be called as Example.adder(1 2) but if we have a variable like so:
90//
91// let e: Example
92// let f: Example
93//
94// We can also use e.adder(f) and the first parameter will be passed
95// automagically.
96//
97// This works instead:
98// let x: S8 = 0xff
99// let y: S8 = 0xff
100// println(x == y)
101//
102// TODO: What do I want to have before moving forward to next steps:
103// Builtins: assert/sizeof
104// Slices
105// Function pointers
106// Malloc/Free
107// Open/close files and/or read_file
108// Switch/match statements
109// Enums
110// .syntax for pointers in structs: x.y where y is @
35 111
36typedef enum ExecMode { 112typedef enum ExecMode {
37 RUN_NORMAL, 113 RUN_NORMAL,
@@ -105,9 +181,6 @@ process_file(Str path) {
105 println("ROOT: %d", ctr++); 181 println("ROOT: %d", ctr++);
106#endif 182#endif
107 parse_expr(&parser, PREC_LOW); 183 parse_expr(&parser, PREC_LOW);
108 if (parser.panic) {
109 break;
110 }
111 } 184 }
112 parse_consume(&parser, TOK_EOF, cstr("expected end of file")); 185 parse_consume(&parser, TOK_EOF, cstr("expected end of file"));
113 if (parser.err) { 186 if (parser.err) {
@@ -128,99 +201,55 @@ process_file(Str path) {
128 }; 201 };
129 symbolic_analysis(&analyzer, &parser); 202 symbolic_analysis(&analyzer, &parser);
130 if (analyzer.err) { 203 if (analyzer.err) {
204 print_symbols(analyzer);
131 exit(EXIT_FAILURE); 205 exit(EXIT_FAILURE);
132 } 206 }
133 207
134 // Printing symbol tables. 208 // Printing symbol tables.
135 if (mode == PRINT_SYMTABLES) { 209 if (mode == PRINT_SYMTABLES) {
136 graph_types(analyzer.scopes, lexer_arena); 210 print_symbols(analyzer);
137 }
138 if (mode == PRINT_SEMANTIC) {
139 graph_ast(parser.nodes);
140 goto stop; 211 goto stop;
141 } 212 }
142
143#if DEBUG == 1 213#if DEBUG == 1
144 println("========== enums =========="); 214 else {
145 for (sz i = 0; i < array_size(analyzer.scopes); i++) { 215 print_symbols(analyzer);
146 Arena scratch = lexer_arena;
147 Scope *scope = analyzer.scopes[i];
148 EnumMapIter iter = enummap_iterator(scope->enums, &scratch);
149 EnumMap *m = enummap_next(&iter, &scratch);
150 while (m) {
151 println("scope: %x{2} -- %s: enum: %s", scope->id, path,
152 m->val.name);
153 m = enummap_next(&iter, &scratch);
154 }
155 }
156 println("========= structs =========");
157 for (sz i = 0; i < array_size(analyzer.scopes); i++) {
158 Arena scratch = lexer_arena;
159 Scope *scope = analyzer.scopes[i];
160 StructMapIter iter = structmap_iterator(scope->structs, &scratch);
161 StructMap *m = structmap_next(&iter, &scratch);
162 while (m) {
163 println("scope: %x{2} -- %s: struct: %s", scope->id, path,
164 m->val.name);
165 m = structmap_next(&iter, &scratch);
166 }
167 }
168 println("======== functions ========");
169 for (sz i = 0; i < array_size(analyzer.scopes); i++) {
170 Arena scratch = lexer_arena;
171 Scope *scope = analyzer.scopes[i];
172 FunMapIter iter = funmap_iterator(scope->funcs, &scratch);
173 FunMap *m = funmap_next(&iter, &scratch);
174 while (m) {
175 println("scope: %x{2} -- %s: func: %s(%s): (%s)", scope->id, path,
176 m->val.name, m->val.param_type, m->val.return_type);
177 m = funmap_next(&iter, &scratch);
178 }
179 }
180 println("========= symbols =========");
181 for (sz i = 0; i < array_size(analyzer.scopes); i++) {
182 Arena scratch = lexer_arena;
183 Scope *scope = analyzer.scopes[i];
184 SymbolMapIter iter = symmap_iterator(scope->symbols, &scratch);
185 SymbolMap *m = symmap_next(&iter, &scratch);
186 while (m) {
187 println("scope: %x{2} -- %s: %s %s: %s", scope->id, path,
188 sym_kind_str[m->val.kind], m->key, m->val.name);
189 m = symmap_next(&iter, &scratch);
190 }
191 } 216 }
192#endif 217#endif
218 if (mode == PRINT_SEMANTIC) {
219 graph_ast(parser.nodes);
220 goto stop;
221 }
193 222
194 // TODO: Type checking.
195
196 // Compile roots.
197#if DEBUG == 1 223#if DEBUG == 1
198 println("compilation..."); 224 println("compilation...");
199#endif 225#endif
226 // Compile roots.
200 Arena bytecode_arena = arena_create(LEXER_MEM, os_allocator); 227 Arena bytecode_arena = arena_create(LEXER_MEM, os_allocator);
201 Compiler compiler = { 228 Compiler compiler = {
202 .file_name = path, 229 .file_name = path,
203 .storage = &bytecode_arena, 230 .storage = &bytecode_arena,
204 .integer_types = analyzer.integer_types, 231 .lab_pre = -1,
205 .numeric_types = analyzer.numeric_types, 232 .lab_post = -1,
206 }; 233 };
207 bytecode_compiler(&compiler, parser); 234 bytecode_compiler(&compiler, analyzer);
208 disassemble_chunk(compiler.main_chunk); 235
236 for (sz i = 0; i < array_size(compiler.chunks); i++) {
237 disassemble_chunk(*compiler.chunks[i]);
238 }
209 239
210 // Run bytecode on VM. 240 // Run bytecode on VM.
211 VM vm = {0}; 241 VM vm = {0};
212 vm_init(&vm, &compiler.main_chunk); 242 vm_init(&vm, compiler.chunks);
213 vm_run(&vm); 243 vm_run(&vm);
214#if DEBUG == 1 244#if DEBUG == 1
215 println("MEMORY:\n%{Mem}", 245 println("MEMORY:\n%{Mem}", &(Array){.mem = (u8 *)&vm.stack, 64});
216 &(Array){.mem = (u8 *)&vm.stack, sizeof(vm.stack)});
217#endif 246#endif
218 247
219#if DEBUG == 1 248#if DEBUG == 1
220 println("Space used: %{Arena}", &lexer_arena); 249 println("Space used: %{Arena}", &lexer_arena);
221#endif 250#endif
222 251
223 // arena_destroy(&bytecode_arena, os_allocator); 252 arena_destroy(&bytecode_arena, os_allocator);
224stop: 253stop:
225 // Free up resources. 254 // Free up resources.
226 arena_destroy(&lexer_arena, os_allocator); 255 arena_destroy(&lexer_arena, os_allocator);
diff --git a/src/parser.c b/src/parser.c
index 5253841..a1a5924 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -62,12 +62,15 @@ typedef enum NodeKind {
62 NODE_FUNCALL, 62 NODE_FUNCALL,
63 NODE_RETURN, 63 NODE_RETURN,
64 // Helpers. 64 // Helpers.
65 NODE_SYMBOL_IDX,
66 NODE_TYPE, 65 NODE_TYPE,
67 NODE_COMPOUND_TYPE, 66 NODE_COMPOUND_TYPE,
68 NODE_ARR_TYPE,
69 NODE_FIELD, 67 NODE_FIELD,
70 NODE_BLOCK, 68 NODE_BLOCK,
69 NODE_PTR,
70 NODE_DEREF,
71 NODE_ARR,
72 NODE_INDEX,
73 NODE_DOT,
71} NodeKind; 74} NodeKind;
72 75
73Str node_str[] = { 76Str node_str[] = {
@@ -126,10 +129,13 @@ Str node_str[] = {
126 // Helpers. 129 // Helpers.
127 [NODE_TYPE] = cstr("TYPE"), 130 [NODE_TYPE] = cstr("TYPE"),
128 [NODE_COMPOUND_TYPE] = cstr("COMPOUND TYPE"), 131 [NODE_COMPOUND_TYPE] = cstr("COMPOUND TYPE"),
129 [NODE_ARR_TYPE] = cstr("TYPE (ARR)"),
130 [NODE_SYMBOL_IDX] = cstr("SYMBOL[IDX]"),
131 [NODE_FIELD] = cstr("FIELD"), 132 [NODE_FIELD] = cstr("FIELD"),
132 [NODE_BLOCK] = cstr("BLOCK"), 133 [NODE_BLOCK] = cstr("BLOCK"),
134 [NODE_PTR] = cstr("@"),
135 [NODE_DEREF] = cstr("DEREF"),
136 [NODE_ARR] = cstr("ARR"),
137 [NODE_INDEX] = cstr("INDEX"),
138 [NODE_DOT] = cstr("DOT"),
133}; 139};
134 140
135typedef union NodeLit { 141typedef union NodeLit {
@@ -193,10 +199,30 @@ typedef struct NodeFunction {
193 struct Node *body; 199 struct Node *body;
194} NodeFunction; 200} NodeFunction;
195 201
196typedef struct NodeSymbol { 202typedef struct NodeIndex {
197 struct Node *next; 203 struct Node *next;
198 struct Node *arr_size; 204 struct Node *value;
199} NodeSymbol; 205} NodeIndex;
206
207typedef struct NodeType {
208 struct Node *next;
209} NodeType;
210
211typedef struct NodeDeref {
212 struct Node *next;
213} NodeDeref;
214
215typedef enum {
216 NODE_ARR_STATIC,
217 NODE_ARR_DYNAMIC,
218 NODE_ARR_SLICE,
219} NodeArrKind;
220
221typedef struct NodeArr {
222 struct Node *next;
223 struct Node *size;
224 NodeArrKind kind;
225} NodeArr;
200 226
201typedef struct Node { 227typedef struct Node {
202 sz id; 228 sz id;
@@ -210,7 +236,6 @@ typedef struct Node {
210 NodeBinary binary; 236 NodeBinary binary;
211 NodeUnary unary; 237 NodeUnary unary;
212 NodeVariable var; 238 NodeVariable var;
213 NodeSymbol sym;
214 NodeLoop loop; 239 NodeLoop loop;
215 NodeIf ifelse; 240 NodeIf ifelse;
216 NodeField field; 241 NodeField field;
@@ -218,17 +243,21 @@ typedef struct Node {
218 NodeMatch match; 243 NodeMatch match;
219 NodeCase case_entry; 244 NodeCase case_entry;
220 NodeFunction func; 245 NodeFunction func;
246 NodeType t;
247 NodeIndex idx;
248 NodeDeref deref;
249 NodeArr array;
221 struct Node **struct_field; 250 struct Node **struct_field;
222 struct Node **elements; 251 struct Node **elements;
223 struct Node **statements; 252 struct Node **statements;
224 struct Node **expressions; 253 struct Node **expressions;
225 struct Node **arguments; 254 struct Node **arguments;
226 }; 255 };
227 bool is_ptr;
228 Str type; 256 Str type;
229 Str type_params; 257 Str type_params; // I hate these here tbh.
230 Str type_returns; 258 Str type_returns; // I hate these here tbh.
231 Str unique_name; 259 Str unique_name;
260 bool lvalue;
232} Node; 261} Node;
233 262
234// 263//
@@ -254,12 +283,12 @@ typedef struct Parser {
254typedef enum { 283typedef enum {
255 PREC_NONE = 0, 284 PREC_NONE = 0,
256 PREC_LOW, // lowest precedence 285 PREC_LOW, // lowest precedence
257 PREC_BITLOGIC, // & | 286 PREC_OR, // or
258 PREC_BITSHIFT, // << >> 287 PREC_AND, // and
259 PREC_OR, // ||
260 PREC_AND, // &&
261 PREC_EQUALITY, // == != 288 PREC_EQUALITY, // == !=
262 PREC_COMPARISON, // < > <= >= 289 PREC_COMPARISON, // < > <= >=
290 PREC_BITLOGIC, // & |
291 PREC_BITSHIFT, // << >>
263 PREC_TERM, // + - 292 PREC_TERM, // + -
264 PREC_FACTOR, // * / % 293 PREC_FACTOR, // * / %
265 PREC_UNARY, // ! - 294 PREC_UNARY, // ! -
@@ -289,11 +318,13 @@ void parse_symbol(Parser *parser);
289void parse_keyword(Parser *parser); 318void parse_keyword(Parser *parser);
290void parse_type(Parser *parser); 319void parse_type(Parser *parser);
291void parse_block(Parser *parser); 320void parse_block(Parser *parser);
321void parse_dot(Parser *parser);
292 322
293ParseRule parse_rules[] = { 323ParseRule parse_rules[] = {
294 [TOK_LPAREN] = {parse_grouping, NULL, PREC_NONE}, 324 [TOK_LPAREN] = {parse_grouping, NULL, PREC_NONE},
295 [TOK_LCURLY] = {parse_block, NULL, PREC_NONE}, 325 [TOK_LCURLY] = {parse_block, NULL, PREC_NONE},
296 [TOK_AT] = {parse_symbol, NULL, PREC_NONE}, 326 [TOK_AT] = {parse_symbol, NULL, PREC_NONE},
327 [TOK_DOT] = {NULL, parse_dot, PREC_CALL},
297 328
298 // Arithmetic. 329 // Arithmetic.
299 [TOK_SUB] = {parse_unary, parse_binary, PREC_TERM}, 330 [TOK_SUB] = {parse_unary, parse_binary, PREC_TERM},
@@ -351,7 +382,6 @@ ParseRule parse_rules[] = {
351 382
352Node * 383Node *
353node_alloc(Parser *parser, NodeKind kind, Token tok) { 384node_alloc(Parser *parser, NodeKind kind, Token tok) {
354 if (parser->panic) return NULL;
355 static sz id = 0; 385 static sz id = 0;
356 Node *node = arena_calloc((sz)sizeof(Node), parser->storage); 386 Node *node = arena_calloc((sz)sizeof(Node), parser->storage);
357 node->id = id++; 387 node->id = id++;
@@ -421,20 +451,44 @@ parse_block(Parser *parser) {
421 print_token(parser->previous); 451 print_token(parser->previous);
422#endif 452#endif
423 Node *block = node_alloc(parser, NODE_BLOCK, parser->previous); 453 Node *block = node_alloc(parser, NODE_BLOCK, parser->previous);
424 if (!block) return;
425 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { 454 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
426 parse_expr(parser, PREC_LOW); 455 parse_expr(parser, PREC_LOW);
427 Node *next = array_pop(parser->nodes); 456 Node *next = array_pop(parser->nodes);
457 next->parent = block;
428 array_push(block->statements, next, parser->storage); 458 array_push(block->statements, next, parser->storage);
429 } 459 }
430 array_push(parser->nodes, block, parser->storage); 460 array_push(parser->nodes, block, parser->storage);
431} 461}
432 462
433void 463void
464parser_sync(Parser *parser) {
465 parser->panic = false;
466 while (parser->current.kind != TOK_EOF) {
467 switch (parser->current.kind) {
468 case TOK_FUN:
469 case TOK_LET:
470 case TOK_SET:
471 case TOK_STRUCT:
472 case TOK_ENUM:
473 case TOK_FOR:
474 case TOK_IF:
475 case TOK_WHILE:
476 case TOK_BREAK:
477 case TOK_CONTINUE:
478 case TOK_COND:
479 case TOK_MATCH:
480 case TOK_CASE:
481 case TOK_RETURN: return;
482 default: break;
483 }
484 parse_advance(parser);
485 }
486}
487
488void
434parse_expr(Parser *parser, ParsePrecedence precedence) { 489parse_expr(Parser *parser, ParsePrecedence precedence) {
435 parse_advance(parser); 490 parse_advance(parser);
436 // TODO: synchronize panic mode on keywords. 491 if (parser->panic) parser_sync(parser);
437 if (parser->panic) return;
438 ParseFn prefix = parse_rules[parser->previous.kind].prefix; 492 ParseFn prefix = parse_rules[parser->previous.kind].prefix;
439 if (prefix == NULL) { 493 if (prefix == NULL) {
440 parse_emit_err(parser, parser->previous, cstr("expected expression")); 494 parse_emit_err(parser, parser->previous, cstr("expected expression"));
@@ -469,8 +523,8 @@ parse_unary(Parser *parser) {
469 case TOK_BITNOT: node = node_alloc(parser, NODE_BITNOT, prev); break; 523 case TOK_BITNOT: node = node_alloc(parser, NODE_BITNOT, prev); break;
470 default: break; // Unreachable. 524 default: break; // Unreachable.
471 } 525 }
472 if (!node) return;
473 node->unary.left = array_pop(parser->nodes); 526 node->unary.left = array_pop(parser->nodes);
527 node->unary.left->parent = node;
474 array_push(parser->nodes, node, parser->storage); 528 array_push(parser->nodes, node, parser->storage);
475} 529}
476 530
@@ -492,7 +546,6 @@ parse_literal(Parser *parser) {
492 case TOK_NIL: node = node_alloc(parser, NODE_NIL, prev); break; 546 case TOK_NIL: node = node_alloc(parser, NODE_NIL, prev); break;
493 default: return; // Unreachable. 547 default: return; // Unreachable.
494 } 548 }
495 if (!node) return;
496 array_push(parser->nodes, node, parser->storage); 549 array_push(parser->nodes, node, parser->storage);
497} 550}
498 551
@@ -504,21 +557,50 @@ parse_type(Parser *parser) {
504 print_token(prev); 557 print_token(prev);
505#endif 558#endif
506 Node *node = node_alloc(parser, NODE_TYPE, prev); 559 Node *node = node_alloc(parser, NODE_TYPE, prev);
507 if (!node) return; 560 Node *child = node;
508 if (parse_match(parser, TOK_AT)) { 561 while (parse_match(parser, TOK_AT) || parse_match(parser, TOK_LSQUARE)) {
509 node->is_ptr = true; 562 switch (parser->previous.kind) {
563 case TOK_AT: {
564 Node *ptr_node = node_alloc(parser, NODE_PTR, parser->previous);
565 ptr_node->parent = child;
566 child->t.next = ptr_node;
567 child = ptr_node;
568 } break;
569 case TOK_LSQUARE: {
570 Node *ptr_node = node_alloc(parser, NODE_ARR, parser->previous);
571 if (parse_match(parser, TOK_NUM_INT)) {
572 // Static array.
573 parse_number(parser);
574 ptr_node->array.size = array_pop(parser->nodes);
575 ptr_node->array.kind = NODE_ARR_STATIC;
576 ptr_node->array.size->parent = ptr_node;
577 parse_consume(parser, TOK_RSQUARE,
578 cstr("unmatched brackets ']' in array type"));
579 } else if (parse_match(parser, TOK_RSQUARE)) {
580 // Slice.
581 ptr_node->array.kind = NODE_ARR_SLICE;
582 } else {
583 // Dynamic array.
584 parse_consume(parser, TOK_DOT, cstr("invalid array type"));
585 parse_consume(parser, TOK_DOT, cstr("invalid array type"));
586 parse_consume(parser, TOK_DOT, cstr("invalid array type"));
587 parse_consume(parser, TOK_RSQUARE,
588 cstr("unmatched brackets ']' in array type"));
589 ptr_node->array.kind = NODE_ARR_DYNAMIC;
590 }
591 ptr_node->parent = child;
592 child->t.next = ptr_node;
593 child = ptr_node;
594 } break;
595 default: {
596 parse_emit_err(parser, prev, cstr("unimplemented"));
597 } break;
598 }
510 } 599 }
511 parse_consume(parser, TOK_SYMBOL, cstr("no type given for struct field")); 600 // TODO: maps
601 // TODO: function pointer syntax: : (T T : R)
602 parse_consume(parser, TOK_SYMBOL, cstr("expected type name"));
512 node->value.sym = parser->previous.val; 603 node->value.sym = parser->previous.val;
513 // Optional array value?
514 if (parse_match(parser, TOK_LSQUARE)) {
515 node->kind = NODE_ARR_TYPE,
516 parse_consume(parser, TOK_NUM_INT, cstr("no array size given"));
517 parse_number(parser);
518 node->sym.arr_size = array_pop(parser->nodes);
519 parse_consume(parser, TOK_RSQUARE,
520 cstr("unmatched brackets ']' in array type"));
521 }
522 array_push(parser->nodes, node, parser->storage); 604 array_push(parser->nodes, node, parser->storage);
523} 605}
524 606
@@ -530,29 +612,31 @@ parse_struct_field(Parser *parser) {
530 print_token(parser->previous); 612 print_token(parser->previous);
531#endif 613#endif
532 Node *field = node_alloc(parser, NODE_FIELD, parser->current); 614 Node *field = node_alloc(parser, NODE_FIELD, parser->current);
533 if (!field) return;
534 parse_consume(parser, TOK_SYMBOL, 615 parse_consume(parser, TOK_SYMBOL,
535 cstr("expected symbol name on struct field")); 616 cstr("expected symbol name on struct field"));
536 field->value.sym = parser->previous.val; 617 field->value.sym = parser->previous.val;
537 parse_consume(parser, TOK_COLON, cstr("expected type in struct field")); 618 parse_consume(parser, TOK_COLON, cstr("expected type in struct field"));
538 if (parse_match(parser, TOK_LCURLY)) { 619 if (parse_match(parser, TOK_LCURLY)) {
539 Node *type = node_alloc(parser, NODE_COMPOUND_TYPE, parser->current); 620 Node *type = node_alloc(parser, NODE_COMPOUND_TYPE, parser->current);
540 if (!type) return; 621 type->parent = field;
541 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { 622 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
542 parse_struct_field(parser); 623 parse_struct_field(parser);
543 Node *subfield = array_pop(parser->nodes); 624 Node *subfield = array_pop(parser->nodes);
625 subfield->parent = type;
544 array_push(type->elements, subfield, parser->storage); 626 array_push(type->elements, subfield, parser->storage);
545 } 627 }
546 field->field.type = type; 628 field->field.type = type;
547 } else { 629 } else {
548 parse_type(parser); 630 parse_type(parser);
549 field->field.type = array_pop(parser->nodes); 631 field->field.type = array_pop(parser->nodes);
632 field->field.type->parent = field;
550 } 633 }
551 634
552 // Optional assignment. 635 // Optional assignment.
553 if (parse_match(parser, TOK_ASSIGN)) { 636 if (parse_match(parser, TOK_ASSIGN)) {
554 parse_expr(parser, PREC_LOW); 637 parse_expr(parser, PREC_LOW);
555 field->field.val = array_pop(parser->nodes); 638 field->field.val = array_pop(parser->nodes);
639 field->field.val->parent = field;
556 } 640 }
557 array_push(parser->nodes, field, parser->storage); 641 array_push(parser->nodes, field, parser->storage);
558} 642}
@@ -565,7 +649,6 @@ parse_struct_lit_field(Parser *parser) {
565 print_token(parser->previous); 649 print_token(parser->previous);
566#endif 650#endif
567 Node *field = node_alloc(parser, NODE_FIELD, parser->current); 651 Node *field = node_alloc(parser, NODE_FIELD, parser->current);
568 if (!field) return;
569 parse_consume(parser, TOK_SYMBOL, 652 parse_consume(parser, TOK_SYMBOL,
570 cstr("expected symbol name on struct field")); 653 cstr("expected symbol name on struct field"));
571 field->value.sym = parser->previous.val; 654 field->value.sym = parser->previous.val;
@@ -573,16 +656,18 @@ parse_struct_lit_field(Parser *parser) {
573 cstr("expected field value on struct literal")); 656 cstr("expected field value on struct literal"));
574 if (parse_match(parser, TOK_LCURLY)) { 657 if (parse_match(parser, TOK_LCURLY)) {
575 Node *type = node_alloc(parser, NODE_COMPOUND_TYPE, parser->current); 658 Node *type = node_alloc(parser, NODE_COMPOUND_TYPE, parser->current);
576 if (!type) return; 659 type->parent = field;
577 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { 660 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
578 parse_struct_lit_field(parser); 661 parse_struct_lit_field(parser);
579 Node *subfield = array_pop(parser->nodes); 662 Node *subfield = array_pop(parser->nodes);
663 subfield->parent = type;
580 array_push(type->elements, subfield, parser->storage); 664 array_push(type->elements, subfield, parser->storage);
581 } 665 }
582 field->field.val = type; 666 field->field.val = type;
583 } else { 667 } else {
584 parse_expr(parser, PREC_LOW); 668 parse_expr(parser, PREC_LOW);
585 field->field.val = array_pop(parser->nodes); 669 field->field.val = array_pop(parser->nodes);
670 field->field.val->parent = field;
586 } 671 }
587 array_push(parser->nodes, field, parser->storage); 672 array_push(parser->nodes, field, parser->storage);
588} 673}
@@ -599,27 +684,24 @@ parse_keyword(Parser *parser) {
599 switch (prev.kind) { 684 switch (prev.kind) {
600 case TOK_LET: { 685 case TOK_LET: {
601 node = node_alloc(parser, NODE_LET, prev); 686 node = node_alloc(parser, NODE_LET, prev);
602 if (!node) return;
603 parse_consume(parser, TOK_SYMBOL, 687 parse_consume(parser, TOK_SYMBOL,
604 cstr("expected symbol name on let expression")); 688 cstr("expected symbol name on let expression"));
605 parse_symbol(parser); 689 parse_symbol(parser);
606 node->var.name = array_pop(parser->nodes); 690 node->var.name = array_pop(parser->nodes);
607 if (node->var.name->sym.next) { 691 node->var.name->parent = node;
608 parse_emit_err(parser, prev,
609 cstr("invalid symbol name in let expression"));
610 return;
611 }
612 692
613 // Optional type declaration. 693 // Optional type declaration.
614 if (parse_match(parser, TOK_COLON)) { 694 if (parse_match(parser, TOK_COLON)) {
615 parse_type(parser); 695 parse_type(parser);
616 node->var.type = array_pop(parser->nodes); 696 node->var.type = array_pop(parser->nodes);
697 node->var.type->parent = node;
617 } 698 }
618 699
619 // Optional assignment. 700 // Optional assignment.
620 if (parse_match(parser, TOK_ASSIGN)) { 701 if (parse_match(parser, TOK_ASSIGN)) {
621 parse_expr(parser, PREC_LOW); 702 parse_expr(parser, PREC_LOW);
622 node->var.val = array_pop(parser->nodes); 703 node->var.val = array_pop(parser->nodes);
704 node->var.val->parent = node;
623 } 705 }
624 706
625 if (node->var.type == NULL && node->var.val == NULL) { 707 if (node->var.type == NULL && node->var.val == NULL) {
@@ -630,11 +712,16 @@ parse_keyword(Parser *parser) {
630 } break; 712 } break;
631 case TOK_SET: { 713 case TOK_SET: {
632 node = node_alloc(parser, NODE_SET, prev); 714 node = node_alloc(parser, NODE_SET, prev);
633 if (!node) return; 715 parse_expr(parser, PREC_LOW);
634 parse_consume(parser, TOK_SYMBOL,
635 cstr("expected symbol name on set expression"));
636 parse_symbol(parser);
637 node->var.name = array_pop(parser->nodes); 716 node->var.name = array_pop(parser->nodes);
717 node->var.name->parent = node;
718 if (node->var.name->kind != NODE_DOT &&
719 node->var.name->kind != NODE_SYMBOL &&
720 node->var.name->kind != NODE_DEREF &&
721 node->var.name->kind != NODE_INDEX) {
722 parse_emit_err(parser, prev,
723 cstr("expected identifier on set expression"));
724 }
638 725
639 if (parse_match(parser, TOK_ADD_ASSIGN) || 726 if (parse_match(parser, TOK_ADD_ASSIGN) ||
640 parse_match(parser, TOK_ADD_ASSIGN) || 727 parse_match(parser, TOK_ADD_ASSIGN) ||
@@ -667,7 +754,10 @@ parse_keyword(Parser *parser) {
667 Node *op = node_alloc(parser, kind, prev); 754 Node *op = node_alloc(parser, kind, prev);
668 op->binary.left = sym; 755 op->binary.left = sym;
669 op->binary.right = value; 756 op->binary.right = value;
757 sym->parent = op;
758 value->parent = op;
670 node->var.val = op; 759 node->var.val = op;
760 node->var.val->parent = node;
671 sym->value = node->var.name->value; 761 sym->value = node->var.name->value;
672 sym->kind = node->var.name->kind; 762 sym->kind = node->var.name->kind;
673 } else { 763 } else {
@@ -679,7 +769,6 @@ parse_keyword(Parser *parser) {
679 } break; 769 } break;
680 case TOK_STRUCT: { 770 case TOK_STRUCT: {
681 node = node_alloc(parser, NODE_STRUCT, prev); 771 node = node_alloc(parser, NODE_STRUCT, prev);
682 if (!node) return;
683 parse_consume(parser, TOK_SYMBOL, 772 parse_consume(parser, TOK_SYMBOL,
684 cstr("expected symbol name on struct definition")); 773 cstr("expected symbol name on struct definition"));
685 // Just consume this to avoid conflicts with struct literals. 774 // Just consume this to avoid conflicts with struct literals.
@@ -691,54 +780,53 @@ parse_keyword(Parser *parser) {
691 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { 780 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
692 parse_struct_field(parser); 781 parse_struct_field(parser);
693 Node *field = array_pop(parser->nodes); 782 Node *field = array_pop(parser->nodes);
694 783 field->parent = node;
695 array_push(node->struct_field, field, parser->storage); 784 array_push(node->struct_field, field, parser->storage);
696 } 785 }
697 } break; 786 } break;
698 case TOK_IF: { 787 case TOK_IF: {
699 node = node_alloc(parser, NODE_IF, prev); 788 node = node_alloc(parser, NODE_IF, prev);
700 if (!node) return;
701 parse_expr(parser, PREC_LOW); 789 parse_expr(parser, PREC_LOW);
702 node->ifelse.cond = array_pop(parser->nodes); 790 node->ifelse.cond = array_pop(parser->nodes);
791 node->ifelse.cond->parent = node;
703 parse_expr(parser, PREC_LOW); 792 parse_expr(parser, PREC_LOW);
704 node->ifelse.expr_true = array_pop(parser->nodes); 793 node->ifelse.expr_true = array_pop(parser->nodes);
794 node->ifelse.expr_true->parent = node;
705 if (parse_match(parser, TOK_ELSE)) { 795 if (parse_match(parser, TOK_ELSE)) {
706 parse_expr(parser, PREC_LOW); 796 parse_expr(parser, PREC_LOW);
707 node->ifelse.expr_else = array_pop(parser->nodes); 797 node->ifelse.expr_else = array_pop(parser->nodes);
798 node->ifelse.expr_else->parent = node;
708 } 799 }
709 } break; 800 } break;
710 case TOK_MATCH: { 801 case TOK_MATCH: {
711 node = node_alloc(parser, NODE_MATCH, prev); 802 node = node_alloc(parser, NODE_MATCH, prev);
712 if (!node) return;
713 parse_expr(parser, PREC_LOW); 803 parse_expr(parser, PREC_LOW);
714 node->match.expr = array_pop(parser->nodes); 804 node->match.expr = array_pop(parser->nodes);
805 node->match.expr->parent = node;
715 parse_consume(parser, TOK_LCURLY, 806 parse_consume(parser, TOK_LCURLY,
716 cstr("expected block of match cases")); 807 cstr("expected block of match cases"));
717 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { 808 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
718 Node *tmp = 809 Node *tmp =
719 node_alloc(parser, NODE_CASE_MATCH, parser->previous); 810 node_alloc(parser, NODE_CASE_MATCH, parser->previous);
720 if (!tmp) return; 811 tmp->parent = node;
721 // Are we on the default case. 812 // Are we on the default case.
722 if (!parse_match(parser, TOK_ELSE)) { 813 if (!parse_match(parser, TOK_ELSE)) {
723 parse_consume(parser, TOK_CASE, 814 parse_consume(parser, TOK_CASE,
724 cstr("expected case statement")); 815 cstr("expected case statement"));
725 parse_expr(parser, PREC_LOW); 816 parse_expr(parser, PREC_LOW);
726 tmp->case_entry.cond = array_pop(parser->nodes); 817 tmp->case_entry.cond = array_pop(parser->nodes);
818 tmp->case_entry.cond->parent = tmp;
727 } 819 }
728 parse_consume(parser, TOK_ASSIGN, 820 parse_consume(parser, TOK_ASSIGN,
729 cstr("malformed case statement")); 821 cstr("malformed case statement"));
730 parse_expr(parser, PREC_LOW); 822 parse_expr(parser, PREC_LOW);
731 tmp->case_entry.expr = array_pop(parser->nodes); 823 tmp->case_entry.expr = array_pop(parser->nodes);
824 tmp->case_entry.expr->parent = tmp;
732 array_push(node->match.cases, tmp, parser->storage); 825 array_push(node->match.cases, tmp, parser->storage);
733 } 826 }
734 // TODO: Check that we only have literals on the match case,
735 // this could be done on the analysis step, but also here...
736 // TODO: Check that there are no multiple default or duplicated
737 // cases.
738 } break; 827 } break;
739 case TOK_ENUM: { 828 case TOK_ENUM: {
740 node = node_alloc(parser, NODE_ENUM, prev); 829 node = node_alloc(parser, NODE_ENUM, prev);
741 if (!node) return;
742 parse_consume(parser, TOK_SYMBOL, 830 parse_consume(parser, TOK_SYMBOL,
743 cstr("expected symbol name on enum definition")); 831 cstr("expected symbol name on enum definition"));
744 // Just consume this to avoid conflicts with struct literals. 832 // Just consume this to avoid conflicts with struct literals.
@@ -747,13 +835,14 @@ parse_keyword(Parser *parser) {
747 cstr("expected '{' on enum definition")); 835 cstr("expected '{' on enum definition"));
748 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { 836 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
749 Node *field = node_alloc(parser, NODE_FIELD, parser->current); 837 Node *field = node_alloc(parser, NODE_FIELD, parser->current);
750 if (!field) return; 838 field->parent = node;
751 parse_consume(parser, TOK_SYMBOL, 839 parse_consume(parser, TOK_SYMBOL,
752 cstr("expected symbol name on enum definition")); 840 cstr("expected symbol name on enum definition"));
753 field->value.sym = parser->previous.val; 841 field->value.sym = parser->previous.val;
754 if (parse_match(parser, TOK_ASSIGN)) { 842 if (parse_match(parser, TOK_ASSIGN)) {
755 parse_expr(parser, PREC_LOW); 843 parse_expr(parser, PREC_LOW);
756 field->field.val = array_pop(parser->nodes); 844 field->field.val = array_pop(parser->nodes);
845 field->field.val->parent = field;
757 } 846 }
758 array_push(node->struct_field, field, parser->storage); 847 array_push(node->struct_field, field, parser->storage);
759 } 848 }
@@ -763,50 +852,51 @@ parse_keyword(Parser *parser) {
763 } break; 852 } break;
764 case TOK_COND: { 853 case TOK_COND: {
765 node = node_alloc(parser, NODE_COND, prev); 854 node = node_alloc(parser, NODE_COND, prev);
766 if (!node) return;
767 parse_consume(parser, TOK_LCURLY, 855 parse_consume(parser, TOK_LCURLY,
768 cstr("expected '{' on cond expression")); 856 cstr("expected '{' on cond expression"));
769 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { 857 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
770 Node *tmp = 858 Node *tmp =
771 node_alloc(parser, NODE_CASE_COND, parser->previous); 859 node_alloc(parser, NODE_CASE_COND, parser->previous);
772 if (!tmp) return; 860 tmp->parent = node;
773 // Are we on the default case. 861 // Are we on the default case.
774 if (!parse_match(parser, TOK_ELSE)) { 862 if (!parse_match(parser, TOK_ELSE)) {
775 parse_expr(parser, PREC_LOW); 863 parse_expr(parser, PREC_LOW);
776 tmp->case_entry.cond = array_pop(parser->nodes); 864 tmp->case_entry.cond = array_pop(parser->nodes);
865 tmp->case_entry.cond->parent = tmp;
777 } 866 }
778 parse_consume(parser, TOK_ASSIGN, 867 parse_consume(parser, TOK_ASSIGN,
779 cstr("malformed case statement")); 868 cstr("malformed case statement"));
780 parse_expr(parser, PREC_LOW); 869 parse_expr(parser, PREC_LOW);
781 tmp->case_entry.expr = array_pop(parser->nodes); 870 tmp->case_entry.expr = array_pop(parser->nodes);
871 tmp->case_entry.expr->parent = tmp;
782 array_push(node->match.cases, tmp, parser->storage); 872 array_push(node->match.cases, tmp, parser->storage);
783 } 873 }
784 } break; 874 } break;
785 case TOK_BREAK: { 875 case TOK_BREAK: {
786 node = node_alloc(parser, NODE_BREAK, prev); 876 node = node_alloc(parser, NODE_BREAK, prev);
787 if (!node) return;
788 } break; 877 } break;
789 case TOK_CONTINUE: { 878 case TOK_CONTINUE: {
790 node = node_alloc(parser, NODE_CONTINUE, prev); 879 node = node_alloc(parser, NODE_CONTINUE, prev);
791 if (!node) return;
792 } break; 880 } break;
793 case TOK_FOR: { 881 case TOK_FOR: {
794 node = node_alloc(parser, NODE_BLOCK, prev); 882 node = node_alloc(parser, NODE_BLOCK, prev);
795 if (!node) return;
796 883
797 Node *node_while = node_alloc(parser, NODE_WHILE, prev); 884 Node *node_while = node_alloc(parser, NODE_WHILE, prev);
798 if (!node_while) return; 885 node_while->parent = node;
799 Node *block = node_alloc(parser, NODE_BLOCK, prev); 886 Node *block = node_alloc(parser, NODE_BLOCK, prev);
800 if (!block) return; 887 block->parent = node_while;
801 888
802 parse_expr(parser, PREC_LOW); 889 parse_expr(parser, PREC_LOW);
803 Node *pre = array_pop(parser->nodes); 890 Node *pre = array_pop(parser->nodes);
891 pre->parent = node;
804 892
805 parse_expr(parser, PREC_LOW); 893 parse_expr(parser, PREC_LOW);
806 Node *cond = array_pop(parser->nodes); 894 Node *cond = array_pop(parser->nodes);
895 cond->parent = node_while;
807 896
808 parse_expr(parser, PREC_LOW); 897 parse_expr(parser, PREC_LOW);
809 Node *post = array_pop(parser->nodes); 898 Node *post = array_pop(parser->nodes);
899 post->parent = block;
810 900
811 // Body. 901 // Body.
812 parse_consume(parser, TOK_LCURLY, 902 parse_consume(parser, TOK_LCURLY,
@@ -814,6 +904,7 @@ parse_keyword(Parser *parser) {
814 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { 904 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
815 parse_expr(parser, PREC_LOW); 905 parse_expr(parser, PREC_LOW);
816 Node *next = array_pop(parser->nodes); 906 Node *next = array_pop(parser->nodes);
907 next->parent = block;
817 array_push(block->statements, next, parser->storage); 908 array_push(block->statements, next, parser->storage);
818 } 909 }
819 array_push(block->statements, post, parser->storage); 910 array_push(block->statements, post, parser->storage);
@@ -826,18 +917,18 @@ parse_keyword(Parser *parser) {
826 } break; 917 } break;
827 case TOK_WHILE: { 918 case TOK_WHILE: {
828 node = node_alloc(parser, NODE_WHILE, prev); 919 node = node_alloc(parser, NODE_WHILE, prev);
829 if (!node) return;
830 parse_expr(parser, PREC_LOW); 920 parse_expr(parser, PREC_LOW);
831 node->loop.cond = array_pop(parser->nodes); 921 node->loop.cond = array_pop(parser->nodes);
922 node->loop.cond->parent = node;
832 parse_expr(parser, PREC_LOW); 923 parse_expr(parser, PREC_LOW);
833 node->loop.expr = array_pop(parser->nodes); 924 node->loop.expr = array_pop(parser->nodes);
925 node->loop.expr->parent = node;
834 } break; 926 } break;
835 case TOK_FUN: { 927 case TOK_FUN: {
836 node = node_alloc(parser, NODE_FUN, prev); 928 node = node_alloc(parser, NODE_FUN, prev);
837 if (!node) return;
838 parse_consume(parser, TOK_SYMBOL, cstr("expected function name")); 929 parse_consume(parser, TOK_SYMBOL, cstr("expected function name"));
839 Node *name = node_alloc(parser, NODE_SYMBOL, prev); 930 Node *name = node_alloc(parser, NODE_SYMBOL, prev);
840 if (!name) return; 931 name->parent = node;
841 name->value.sym = parser->previous.val; 932 name->value.sym = parser->previous.val;
842 node->func.name = name; 933 node->func.name = name;
843 parse_consume(parser, TOK_LPAREN, 934 parse_consume(parser, TOK_LPAREN,
@@ -846,80 +937,33 @@ parse_keyword(Parser *parser) {
846 while (!parse_match(parser, TOK_RPAREN) && !parser->panic) { 937 while (!parse_match(parser, TOK_RPAREN) && !parser->panic) {
847 Node *param = 938 Node *param =
848 node_alloc(parser, NODE_FUN_PARAM, parser->current); 939 node_alloc(parser, NODE_FUN_PARAM, parser->current);
849 if (!param) return; 940 param->parent = node;
941
942 // Name.
850 Node *name = node_alloc(parser, NODE_SYMBOL, prev); 943 Node *name = node_alloc(parser, NODE_SYMBOL, prev);
851 if (!name) return;
852 parse_consume(parser, TOK_SYMBOL, cstr("expected symbol name")); 944 parse_consume(parser, TOK_SYMBOL, cstr("expected symbol name"));
853 name->value.sym = parser->previous.val; 945 name->value.sym = parser->previous.val;
854 param->param.name = name;
855 946
856 Node *type = node_alloc(parser, NODE_TYPE, prev); 947 // Type.
857 if (!type) return;
858 parse_consume(parser, TOK_COLON, cstr("expected param type")); 948 parse_consume(parser, TOK_COLON, cstr("expected param type"));
859 if (parse_match(parser, TOK_AT)) { 949 parse_type(parser);
860 type->is_ptr = true; 950
861 } 951 // Put everything together.
862 parse_consume(parser, TOK_SYMBOL, cstr("expected param type")); 952 param->param.name = name;
863 type->value.sym = parser->previous.val; 953 param->param.type = array_pop(parser->nodes);
864 param->param.type = type; 954 param->param.name->parent = param;
865 if (parse_match(parser, TOK_LSQUARE)) { 955 param->param.type->parent = param;
866 type->kind = NODE_ARR_TYPE,
867 parse_consume(parser, TOK_NUM_INT,
868 cstr("no array size given"));
869 parse_number(parser);
870 type->sym.arr_size = array_pop(parser->nodes);
871 parse_consume(parser, TOK_RSQUARE,
872 cstr("unmatched brackets ']' in array type"));
873 }
874 array_push(node->func.params, param, parser->storage); 956 array_push(node->func.params, param, parser->storage);
875 } 957 }
876 parse_consume(parser, TOK_COLON, cstr("expected param type"));
877 958
878 // Return type(s). 959 // Return type(s).
879 if (!parse_match(parser, TOK_NIL)) { 960 // NOTE: We are setup here for multiple return values, but we are
880 if (parse_match(parser, TOK_LPAREN)) { 961 // currently only considering a single one for simplicity.
881 while (!parse_match(parser, TOK_RPAREN) && !parser->panic) { 962 if (parse_match(parser, TOK_ARROW)) {
882 Node *ret = node_alloc(parser, NODE_TYPE, prev); 963 parse_type(parser);
883 if (!ret) return; 964 Node *ret = array_pop(parser->nodes);
884 if (parse_match(parser, TOK_AT)) { 965 ret->parent = node;
885 ret->is_ptr = true; 966 array_push(node->func.ret, ret, parser->storage);
886 }
887 parse_consume(parser, TOK_SYMBOL,
888 cstr("expected type name"));
889 ret->value.sym = parser->previous.val;
890 if (parse_match(parser, TOK_LSQUARE)) {
891 ret->kind = NODE_ARR_TYPE,
892 parse_consume(parser, TOK_NUM_INT,
893 cstr("no array size given"));
894 parse_number(parser);
895 ret->sym.arr_size = array_pop(parser->nodes);
896 parse_consume(parser, TOK_RSQUARE,
897 cstr("unmatched brackets ']' in "
898 "array type"));
899 }
900 array_push(node->func.ret, ret, parser->storage);
901 }
902 } else {
903 Node *ret = node_alloc(parser, NODE_TYPE, prev);
904 if (!ret) return;
905 if (parse_match(parser, TOK_AT)) {
906 ret->is_ptr = true;
907 }
908 parse_consume(parser, TOK_SYMBOL,
909 cstr("expected type name"));
910 ret->value.sym = parser->previous.val;
911 if (parse_match(parser, TOK_LSQUARE)) {
912 ret->kind = NODE_ARR_TYPE,
913 parse_consume(parser, TOK_NUM_INT,
914 cstr("no array size given"));
915 parse_number(parser);
916 ret->sym.arr_size = array_pop(parser->nodes);
917 parse_consume(
918 parser, TOK_RSQUARE,
919 cstr("unmatched brackets ']' in array type"));
920 }
921 array_push(node->func.ret, ret, parser->storage);
922 }
923 } 967 }
924 968
925 // Body. 969 // Body.
@@ -928,13 +972,13 @@ parse_keyword(Parser *parser) {
928 } break; 972 } break;
929 case TOK_RETURN: { 973 case TOK_RETURN: {
930 node = node_alloc(parser, NODE_RETURN, prev); 974 node = node_alloc(parser, NODE_RETURN, prev);
931 if (!node) return;
932 parse_consume(parser, TOK_LPAREN, 975 parse_consume(parser, TOK_LPAREN,
933 cstr("expected '(' after return")); 976 cstr("expected '(' after return"));
934 while (!parse_match(parser, TOK_RPAREN) && !parser->panic) { 977 while (!parse_match(parser, TOK_RPAREN) && !parser->panic) {
935 parse_expr(parser, PREC_LOW); 978 parse_expr(parser, PREC_LOW);
936 array_push(node->expressions, array_pop(parser->nodes), 979 Node *val = array_pop(parser->nodes);
937 parser->storage); 980 val->parent = node;
981 array_push(node->expressions, val, parser->storage);
938 } 982 }
939 } break; 983 } break;
940 default: return; // Unreachable. 984 default: return; // Unreachable.
@@ -988,9 +1032,10 @@ parse_binary(Parser *parser) {
988 return; 1032 return;
989 } 1033 }
990 } 1034 }
991 if (!node) return;
992 node->binary.right = array_pop(parser->nodes); 1035 node->binary.right = array_pop(parser->nodes);
993 node->binary.left = array_pop(parser->nodes); 1036 node->binary.left = array_pop(parser->nodes);
1037 node->binary.right->parent = node;
1038 node->binary.left->parent = node;
994 array_push(parser->nodes, node, parser->storage); 1039 array_push(parser->nodes, node, parser->storage);
995} 1040}
996 1041
@@ -1008,22 +1053,18 @@ parse_number(Parser *parser) {
1008 if (str_has_prefix(prev.val, cstr("0x")) || 1053 if (str_has_prefix(prev.val, cstr("0x")) ||
1009 str_has_prefix(prev.val, cstr("0b"))) { 1054 str_has_prefix(prev.val, cstr("0b"))) {
1010 node = node_alloc(parser, NODE_NUM_UINT, prev); 1055 node = node_alloc(parser, NODE_NUM_UINT, prev);
1011 if (!node) return;
1012 node->value.u = str_to_uint(prev.val); 1056 node->value.u = str_to_uint(prev.val);
1013 } else { 1057 } else {
1014 node = node_alloc(parser, NODE_NUM_INT, prev); 1058 node = node_alloc(parser, NODE_NUM_INT, prev);
1015 if (!node) return;
1016 node->value.i = str_to_int(prev.val); 1059 node->value.i = str_to_int(prev.val);
1017 } 1060 }
1018 } break; 1061 } break;
1019 case TOK_NUM_FLOAT: { 1062 case TOK_NUM_FLOAT: {
1020 node = node_alloc(parser, NODE_NUM_FLOAT, prev); 1063 node = node_alloc(parser, NODE_NUM_FLOAT, prev);
1021 if (!node) return;
1022 node->value.d = str_to_float(prev.val); 1064 node->value.d = str_to_float(prev.val);
1023 } break; 1065 } break;
1024 case TOK_CHAR: { 1066 case TOK_CHAR: {
1025 node = node_alloc(parser, NODE_NUM_INT, prev); 1067 node = node_alloc(parser, NODE_NUM_INT, prev);
1026 if (!node) return;
1027 node->value.i = prev.val.mem[1]; 1068 node->value.i = prev.val.mem[1];
1028 } break; 1069 } break;
1029 default: break; 1070 default: break;
@@ -1040,13 +1081,46 @@ parse_string(Parser *parser) {
1040 print_token(prev); 1081 print_token(prev);
1041#endif 1082#endif
1042 Node *node = node_alloc(parser, NODE_STRING, prev); 1083 Node *node = node_alloc(parser, NODE_STRING, prev);
1043 if (!node) return;
1044 node->value.str = str_remove_prefix(prev.val, cstr("\"")); 1084 node->value.str = str_remove_prefix(prev.val, cstr("\""));
1045 node->value.str = str_remove_suffix(node->value.str, cstr("\"")); 1085 node->value.str = str_remove_suffix(node->value.str, cstr("\""));
1046 array_push(parser->nodes, node, parser->storage); 1086 array_push(parser->nodes, node, parser->storage);
1047} 1087}
1048 1088
1049void 1089void
1090parse_dot(Parser *parser) {
1091 if (parser->panic) return;
1092 Token prev = parser->previous;
1093#if DEBUG == 1
1094 print("parsing dot ");
1095 print_token(prev);
1096#endif
1097 ParseRule rule = parse_rules[prev.kind];
1098 parse_expr(parser, rule.precedence - 1);
1099 Node *node = node_alloc(parser, NODE_DOT, prev);
1100 node->binary.right = array_pop(parser->nodes);
1101 node->binary.left = array_pop(parser->nodes);
1102 node->binary.right->parent = node;
1103 node->binary.left->parent = node;
1104 if (node->binary.left->kind != NODE_DOT &&
1105 node->binary.left->kind != NODE_SYMBOL &&
1106 node->binary.left->kind != NODE_FUNCALL &&
1107 node->binary.left->kind != NODE_DEREF &&
1108 node->binary.left->kind != NODE_PTR &&
1109 node->binary.left->kind != NODE_INDEX) {
1110 parse_emit_err(parser, prev, cstr("invalid dot expression"));
1111 }
1112 if (node->binary.right->kind != NODE_DOT &&
1113 node->binary.right->kind != NODE_SYMBOL &&
1114 node->binary.right->kind != NODE_FUNCALL &&
1115 node->binary.right->kind != NODE_DEREF &&
1116 node->binary.right->kind != NODE_PTR &&
1117 node->binary.right->kind != NODE_INDEX) {
1118 parse_emit_err(parser, prev, cstr("invalid dot expression"));
1119 }
1120 array_push(parser->nodes, node, parser->storage);
1121}
1122
1123void
1050parse_symbol(Parser *parser) { 1124parse_symbol(Parser *parser) {
1051 if (parser->panic) return; 1125 if (parser->panic) return;
1052 Token prev = parser->previous; 1126 Token prev = parser->previous;
@@ -1054,27 +1128,22 @@ parse_symbol(Parser *parser) {
1054 print("parsing symbol "); 1128 print("parsing symbol ");
1055 print_token(prev); 1129 print_token(prev);
1056#endif 1130#endif
1131 // Dereference operators.
1057 if (prev.kind == TOK_AT) { 1132 if (prev.kind == TOK_AT) {
1133 // NOTE: Can we move this to the pratt table?
1134 Node *node = node_alloc(parser, NODE_PTR, parser->previous);
1058 parse_consume(parser, TOK_SYMBOL, 1135 parse_consume(parser, TOK_SYMBOL,
1059 cstr("expected symbol after '.' operator")); 1136 cstr("expected symbol after '@' operator"));
1060 parse_symbol(parser); 1137 parse_symbol(parser);
1061 Node *node = array_pop(parser->nodes); 1138 node->t.next = array_pop(parser->nodes);
1062 if (node) { 1139 node->t.next->parent = node;
1063 node->is_ptr = true; 1140 array_push(parser->nodes, node, parser->storage);
1064 array_push(parser->nodes, node, parser->storage);
1065 }
1066 return; 1141 return;
1067 } 1142 }
1143
1068 Node *node = node_alloc(parser, NODE_SYMBOL, prev); 1144 Node *node = node_alloc(parser, NODE_SYMBOL, prev);
1069 if (!node) return; 1145 node->value.sym = prev.val;
1070 if (parse_match(parser, TOK_DOT)) { 1146 if (parser->current.kind == TOK_COLON && parse_peek(parser) == TOK_LCURLY) {
1071 // Symbol chain.
1072 parse_consume(parser, TOK_SYMBOL,
1073 cstr("expected symbol after '.' operator"));
1074 parse_symbol(parser);
1075 node->sym.next = array_pop(parser->nodes);
1076 } else if (parser->current.kind == TOK_COLON &&
1077 parse_peek(parser) == TOK_LCURLY) {
1078 parse_advance(parser); 1147 parse_advance(parser);
1079 parse_advance(parser); 1148 parse_advance(parser);
1080 1149
@@ -1083,37 +1152,54 @@ parse_symbol(Parser *parser) {
1083 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) { 1152 while (!parse_match(parser, TOK_RCURLY) && !parser->panic) {
1084 parse_struct_lit_field(parser); 1153 parse_struct_lit_field(parser);
1085 Node *field = array_pop(parser->nodes); 1154 Node *field = array_pop(parser->nodes);
1155 field->parent = node;
1086 array_push(node->elements, field, parser->storage); 1156 array_push(node->elements, field, parser->storage);
1087 } 1157 }
1088 } else if (parse_match(parser, TOK_LSQUARE)) { 1158 } else if (parse_match(parser, TOK_LSQUARE)) {
1089 node->kind = NODE_SYMBOL_IDX; 1159 Node *index = node_alloc(parser, NODE_INDEX, prev);
1160 Node *start = index;
1090 parse_expr(parser, PREC_LOW); 1161 parse_expr(parser, PREC_LOW);
1091 node->sym.arr_size = array_pop(parser->nodes);
1092 parse_consume(parser, TOK_RSQUARE, 1162 parse_consume(parser, TOK_RSQUARE,
1093 cstr("unmatched brackets ']' in array type")); 1163 cstr("unmatched brackets ']' in array type"));
1094 if (parse_match(parser, TOK_DOT)) { 1164 index->idx.value = array_pop(parser->nodes);
1095 // Symbol chain. 1165 index->idx.value->parent = start;
1096 parse_consume(parser, TOK_SYMBOL, 1166 while (parse_match(parser, TOK_LSQUARE)) {
1097 cstr("expected symbol after '.' operator")); 1167 Node *next = node_alloc(parser, NODE_INDEX, prev);
1098 parse_symbol(parser); 1168 parse_expr(parser, PREC_LOW);
1099 node->sym.next = array_pop(parser->nodes); 1169 parse_consume(parser, TOK_RSQUARE,
1170 cstr("unmatched brackets ']' in array type"));
1171 next->idx.value = array_pop(parser->nodes);
1172 next->parent = index;
1173 index->idx.next = next;
1174 index = next;
1100 } 1175 }
1176
1177 index->idx.next = node;
1178 index->idx.next->parent = index;
1179 array_push(parser->nodes, start, parser->storage);
1180 return;
1101 } else if (parse_match(parser, TOK_LPAREN)) { 1181 } else if (parse_match(parser, TOK_LPAREN)) {
1102 node->kind = NODE_FUNCALL; 1182 node->kind = NODE_FUNCALL;
1103 while (!parse_match(parser, TOK_RPAREN) && !parser->panic) { 1183 while (!parse_match(parser, TOK_RPAREN) && !parser->panic) {
1104 parse_expr(parser, PREC_LOW); 1184 parse_expr(parser, PREC_LOW);
1105 array_push(node->arguments, array_pop(parser->nodes), 1185 Node *arg = array_pop(parser->nodes);
1106 parser->storage); 1186 arg->parent = node;
1187 array_push(node->arguments, arg, parser->storage);
1107 } 1188 }
1108 if (parse_match(parser, TOK_DOT)) { 1189 } else if (parse_match(parser, TOK_AT)) {
1109 // Symbol chain. 1190 Node *deref = node_alloc(parser, NODE_DEREF, prev);
1110 parse_consume(parser, TOK_SYMBOL, 1191 Node *start = deref;
1111 cstr("expected symbol after '.' operator")); 1192 while (parse_match(parser, TOK_AT)) {
1112 parse_symbol(parser); 1193 Node *next = node_alloc(parser, NODE_DEREF, prev);
1113 node->sym.next = array_pop(parser->nodes); 1194 next->parent = deref;
1195 deref->deref.next = next;
1196 deref = next;
1114 } 1197 }
1198 deref->deref.next = node;
1199 deref->deref.next->parent = deref;
1200 array_push(parser->nodes, start, parser->storage);
1201 return;
1115 } 1202 }
1116 node->value.sym = prev.val;
1117 array_push(parser->nodes, node, parser->storage); 1203 array_push(parser->nodes, node, parser->storage);
1118} 1204}
1119 1205
@@ -1139,22 +1225,30 @@ graph_node(Node *node) {
1139 case NODE_NUM_UINT: print("| Value: %x", node->value.u); break; 1225 case NODE_NUM_UINT: print("| Value: %x", node->value.u); break;
1140 case NODE_NUM_FLOAT: print("| Value: %f{2}", node->value.d); break; 1226 case NODE_NUM_FLOAT: print("| Value: %f{2}", node->value.d); break;
1141 case NODE_STRING: print("| Value: %s", node->value.str); break; 1227 case NODE_STRING: print("| Value: %s", node->value.str); break;
1142 case NODE_SYMBOL_IDX:
1143 case NODE_STRUCT: 1228 case NODE_STRUCT:
1144 case NODE_STRUCT_LIT: print("| Name: %s", node->value.sym); break; 1229 case NODE_STRUCT_LIT: print("| Name: %s", node->value.sym); break;
1145 case NODE_SYMBOL: 1230 case NODE_SYMBOL:
1231 case NODE_DEREF:
1146 case NODE_FUNCALL: 1232 case NODE_FUNCALL:
1147 case NODE_ARR_TYPE: 1233 case NODE_DOT:
1148 case NODE_FIELD: 1234 case NODE_FIELD:
1149 case NODE_TYPE: { 1235 case NODE_TYPE: {
1150 if (node->is_ptr) { 1236 print("| Name: %s", node->value.sym);
1151 print("| Name: @%s", node->value.sym); 1237 } break;
1152 } else { 1238 case NODE_ARR: {
1153 print("| Name: %s", node->value.sym); 1239 if (node->array.kind == NODE_ARR_STATIC) {
1240 print("| Static [%d]", node->array.size->value.i);
1241 } else if (node->array.kind == NODE_ARR_SLICE) {
1242 print("| Slice []");
1243 } else if (node->array.kind == NODE_ARR_DYNAMIC) {
1244 print("| Dynamic [...]");
1154 } 1245 }
1155 } break; 1246 } break;
1156 default: break; 1247 default: break;
1157 } 1248 }
1249 if (node->unique_name.size > 0) {
1250 print("| Unique Name: %s", node->unique_name);
1251 }
1158 if (node->type.size > 0) { 1252 if (node->type.size > 0) {
1159 print("| Type: %s", node->type); 1253 print("| Type: %s", node->type);
1160 } 1254 }
@@ -1164,8 +1258,14 @@ graph_node(Node *node) {
1164 if (node->type_returns.size > 0) { 1258 if (node->type_returns.size > 0) {
1165 print("| Return: %s", node->type_returns); 1259 print("| Return: %s", node->type_returns);
1166 } 1260 }
1261 if (node->lvalue) {
1262 print("| LVALUE");
1263 }
1167 println("\"];"); 1264 println("\"];");
1168 1265
1266 if (node->parent) {
1267 println("%d:w->%d:e;", node->id, node->parent->id);
1268 }
1169 switch (node->kind) { 1269 switch (node->kind) {
1170 case NODE_FUN: { 1270 case NODE_FUN: {
1171 for (sz i = 0; i < array_size(node->func.params); i++) { 1271 for (sz i = 0; i < array_size(node->func.params); i++) {
diff --git a/src/semantic.c b/src/semantic.c
index 7c44671..c6e9ebf 100644
--- a/src/semantic.c
+++ b/src/semantic.c
@@ -8,39 +8,101 @@ typedef enum {
8 SYM_UNKNOWN, 8 SYM_UNKNOWN,
9 SYM_BUILTIN_FUN, 9 SYM_BUILTIN_FUN,
10 SYM_BUILTIN_TYPE, 10 SYM_BUILTIN_TYPE,
11 SYM_TYPE,
11 SYM_FUN, 12 SYM_FUN,
12 SYM_VAR, 13 SYM_GLOBALVAR,
13 SYM_PARAM, 14 SYM_LOCALVAR,
14 SYM_ENUM, 15 SYM_ENUM,
15 SYM_ENUM_FIELD,
16 SYM_STRUCT, 16 SYM_STRUCT,
17 SYM_STRUCT_FIELD, 17 SYM_STRUCT_FIELD,
18} SymbolKind; 18} SymbolKind;
19 19
20Str sym_kind_str[] = { 20Str sym_str[] = {
21 [SYM_UNKNOWN] = cstr("UNKNOWN "), 21 [SYM_UNKNOWN] = cstr("UNKNOWN"),
22 [SYM_BUILTIN_FUN] = cstr("BUILTIN FUN "), 22 [SYM_BUILTIN_FUN] = cstr("BUILTIN FUN"),
23 [SYM_BUILTIN_TYPE] = cstr("BUILTIN TYPE "), 23 [SYM_BUILTIN_TYPE] = cstr("BUILTIN TYPE"),
24 [SYM_FUN] = cstr("FUNCTION "), 24 [SYM_TYPE] = cstr("TYPE"),
25 [SYM_VAR] = cstr("VARIABLE "), 25 [SYM_FUN] = cstr("FUNCTION"),
26 [SYM_PARAM] = cstr("PARAMETER "), 26 [SYM_GLOBALVAR] = cstr("GLOBAL VARIABLE"),
27 [SYM_ENUM] = cstr("ENUM "), 27 [SYM_LOCALVAR] = cstr("LOCAL VARIABLE"),
28 [SYM_ENUM_FIELD] = cstr("ENUM FIELD "), 28 [SYM_ENUM] = cstr("ENUM"),
29 [SYM_STRUCT] = cstr("STRUCT "), 29 [SYM_STRUCT] = cstr("STRUCT"),
30 [SYM_STRUCT_FIELD] = cstr("STRUCT FIELD "), 30 [SYM_STRUCT_FIELD] = cstr("STRUCT FIELD"),
31}; 31};
32 32
33typedef struct Type {
34 // Unique type name.
35 Str id;
36 // Size of the type in bytes.
37 sz size;
38 // Offset from the beginning of the struct memory if it's a struct field.
39 sz offset;
40 // Size of each element in case of an array.
41 sz element_size;
42 struct StructField *fields;
43} Type;
44
45typedef struct StructField {
46 Str name;
47 Type type;
48 sz padding;
49} StructField;
50
51typedef struct Variable {
52 sz idx;
53 Str name;
54
55 // Memory size requirements.
56 sz offset;
57 sz size;
58 sz padding;
59
60 // Type information for reference.
61 Type type;
62} Variable;
63
64typedef struct Function {
65 sz index;
66
67 // A unique name for this function.
68 Str id;
69
70 // The original name of the function.
71 Str name;
72
73 // Type signature of this function.
74 Str type;
75 Str ret_type;
76
77 // Parameters/Returns.
78 Str *params;
79 Str *returns;
80 Variable *vars;
81 sz var_offset;
82 sz param_offset;
83
84 // Body.
85 Node *body;
86
87 // Some statistics on number of values.
88 sz param_arity;
89 sz return_arity;
90 sz n_vars;
91} Function;
92
33typedef struct Symbol { 93typedef struct Symbol {
94 sz scope_id;
95 Str id;
34 Str name; 96 Str name;
97 Str type;
35 SymbolKind kind; 98 SymbolKind kind;
99 union {
100 Function *fun;
101 Type t;
102 Variable var;
103 };
36} Symbol; 104} Symbol;
37 105
38typedef struct Fun {
39 Str name;
40 Str param_type;
41 Str return_type;
42} Fun;
43
44typedef struct Enum { 106typedef struct Enum {
45 Str name; 107 Str name;
46 Node *val; 108 Node *val;
@@ -52,798 +114,1106 @@ typedef struct Struct {
52 Node *val; 114 Node *val;
53} Struct; 115} Struct;
54 116
117#define WORD_SIZE 8
118
119MAPDEF(VarMap, varmap, Str, Variable, str_hash, str_eq)
55MAPDEF(SymbolMap, symmap, Str, Symbol, str_hash, str_eq) 120MAPDEF(SymbolMap, symmap, Str, Symbol, str_hash, str_eq)
56MAPDEF(FunMap, funmap, Str, Fun, str_hash, str_eq) 121MAPDEF(FunMap, funmap, Str, Function, str_hash, str_eq)
57MAPDEF(EnumMap, enummap, Str, Enum, str_hash, str_eq)
58MAPDEF(StructMap, structmap, Str, Struct, str_hash, str_eq)
59 122
60typedef struct Scope { 123typedef struct Scope {
61 sz id; 124 sz id;
62 sz depth; 125 sz depth;
63 Str name; 126 Str name;
64 SymbolMap *symbols; 127 SymbolMap *symbols;
65 FunMap *funcs;
66 EnumMap *enums;
67 StructMap *structs;
68 struct Scope *parent; 128 struct Scope *parent;
69} Scope; 129} Scope;
70 130
71typedef struct Analyzer { 131typedef struct Analyzer {
72 Arena *storage; 132 Arena *storage;
73 Str file_name; 133 Str file_name;
74 sz typescope_gen; 134
135 // Current function being analyzed.
136 Function *current_function;
137 FunMap *fun_map;
138 sz n_funcs;
139
140 // Symbol scopes.
75 Scope **scopes; 141 Scope **scopes;
142
143 // Quick search tables for base types.
76 StrSet *numeric_types; 144 StrSet *numeric_types;
77 StrSet *integer_types; 145 StrSet *integer_types;
146 StrSet *uint_types;
147 StrSet *sint_types;
148 StrSet *float_types;
149
150 // Generation indexes.
151 sz scope_gen;
152 sz fun_gen;
153
78 bool err; 154 bool err;
155 bool panic;
79} Analyzer; 156} Analyzer;
80 157
81Scope * 158Scope *
82typescope_alloc(Analyzer *a, Scope *parent) { 159scope_alloc(Analyzer *a, Scope *parent) {
83 Scope *scope = arena_calloc(sizeof(Scope), a->storage); 160 Scope *scope = arena_calloc(sizeof(Scope), a->storage);
84 scope->parent = parent; 161 scope->parent = parent;
85 if (parent != NULL) { 162 if (parent) {
86 scope->name = parent->name; 163 scope->name = parent->name;
87 } 164 }
88 scope->id = a->typescope_gen++; 165 scope->id = a->scope_gen++;
89 scope->depth = parent == NULL ? 0 : parent->depth + 1; 166 scope->depth = parent == NULL ? 0 : parent->depth + 1;
90 array_push(a->scopes, scope, a->storage); 167 array_push(a->scopes, scope, a->storage);
91 return scope; 168 return scope;
92} 169}
93 170
94SymbolMap * 171typedef struct FindSymbolResult {
95find_type(Scope *scope, Str type) { 172 SymbolMap *map;
173 Scope *scope;
174} FindSymbolResult;
175
176FindSymbolResult
177find_symbol(Scope *scope, Str sym) {
96 while (scope != NULL) { 178 while (scope != NULL) {
97 SymbolMap *val = symmap_lookup(&scope->symbols, type); 179 SymbolMap *val = symmap_lookup(&scope->symbols, sym);
98 if (val != NULL) { 180 if (val != NULL) {
99 return val; 181 return (FindSymbolResult){.map = val, .scope = scope};
100 } 182 }
101 scope = scope->parent; 183 scope = scope->parent;
102 } 184 }
103 return NULL; 185 return (FindSymbolResult){0};
104} 186}
105 187
106FunMap * 188#define SEMANTIC_ERROR(A, N, ...) \
107find_fun(Scope *scope, Str type) { 189 do { \
108 while (scope != NULL) { 190 if ((A)->panic) return cstr(""); \
109 FunMap *val = funmap_lookup(&scope->funcs, type); 191 a->err = true; \
110 if (val != NULL) { 192 a->panic = true; \
111 return val; 193 eprint("%s:%d:%d: error: ", (A)->file_name, (N)->line, (N)->col); \
112 } 194 eprintln(__VA_ARGS__); \
113 scope = scope->parent; 195 return cstr(""); \
196 } while (0)
197
198Str type_inference(Analyzer *a, Node *node, Scope *scope);
199
200Str
201unique_name(Analyzer *a, Str symbol, Scope *scope) {
202 Str id = symbol;
203 id = str_concat(id, cstr("."), a->storage);
204 id = str_concat(id, str_from_int(scope->id, a->storage), a->storage);
205 return id;
206}
207
208Str
209register_var(Analyzer *a,
210 Node *node,
211 Scope *scope,
212 Str var_name,
213 Str var_type) {
214 Str id = unique_name(a, var_name, scope);
215 var_type = str_split(&var_type, cstr("."));
216 // Check storage requirements and register variable.
217 FindSymbolResult sym = find_symbol(scope, var_type);
218 if (!sym.map) {
219 SEMANTIC_ERROR(a, node, "couldn't register var %s: %s", var_name,
220 var_type);
114 } 221 }
115 return NULL; 222 Type type = sym.map->val.t;
223 sz padding = -type.size & (WORD_SIZE - 1);
224 sz size = type.size + padding;
225 sz offset = a->current_function->var_offset;
226 Variable var = {
227 .idx = a->current_function->n_vars++,
228 .name = id,
229 .size = size,
230 .padding = padding,
231 .offset = offset,
232 .type = type,
233 };
234 a->current_function->var_offset += size;
235 array_push(a->current_function->vars, var, a->storage);
236
237 // Register symbol on symbol table.
238 SymbolKind sym_kind =
239 a->current_function->index == 0 ? SYM_GLOBALVAR : SYM_LOCALVAR;
240 symmap_insert(&scope->symbols, var_name,
241 (Symbol){
242 .scope_id = scope->id,
243 .id = id,
244 .name = var_name,
245 .type = sym.map->val.id,
246 .kind = sym_kind,
247 .var = var,
248 },
249 a->storage);
250
251 // Register all struct fields.
252 // if (sym.map->val.kind == SYM_STRUCT) {
253 // for (sz i = 0; i < array_size(type.fields); i++) {
254 // Str suffix = str_concat(cstr("."), type.fields[i].name,
255 // a->storage); Str field_name = str_concat(var_name, suffix,
256 // a->storage); Str field_id = str_concat(id, suffix, a->storage);
257 // sz field_size = type.fields[i].type.size;
258 // offset += type.fields[i].padding;
259 // Variable var = {
260 // .idx = a->current_function->n_vars++,
261 // .name = field_id,
262 // .size = field_size,
263 // .offset = offset,
264 // .type = type.fields[i].type,
265 // };
266 // offset += field_size;
267 // array_push(a->current_function->vars, var, a->storage);
268 // symmap_insert(&scope->symbols, field_name,
269 // (Symbol){
270 // .scope_id = scope->id,
271 // .id = field_id,
272 // .name = field_name,
273 // .type = type.fields[i].type.id,
274 // .kind = sym_kind,
275 // .var = var,
276 // },
277 // a->storage);
278 // }
279 // }
280 return id;
116} 281}
117 282
118typedef struct FindEnumResult { 283Str
119 EnumMap *map; 284analyze_let(Analyzer *a, Node *node, Scope *scope) {
120 Scope *scope; 285 node->type = cstr("nil");
121} FindEnumResult; 286 Str symbol = node->var.name->value.str;
287 node->var.name->lvalue = true;
288 if (symmap_lookup(&scope->symbols, symbol)) {
289 SEMANTIC_ERROR(a, node, "symbol '%s' already exists in current scope",
290 symbol);
291 }
122 292
123FindEnumResult 293 // Check that the name it's not a builtin.
124find_enum(Scope *scope, Str type) { 294 FindSymbolResult res = find_symbol(scope, symbol);
125 while (scope != NULL) { 295 if (res.map) {
126 EnumMap *val = enummap_lookup(&scope->enums, type); 296 SymbolKind kind = res.map->val.kind;
127 if (val != NULL) { 297 if (kind == SYM_BUILTIN_TYPE) {
128 return (FindEnumResult){.map = val, .scope = scope}; 298 SEMANTIC_ERROR(a, node, "can't override builtin type '%s'", symbol);
299 }
300 if (kind == SYM_BUILTIN_FUN) {
301 SEMANTIC_ERROR(a, node, "can't override builtin function '%s'",
302 symbol);
129 } 303 }
130 scope = scope->parent;
131 } 304 }
132 return (FindEnumResult){0};
133}
134 305
135typedef struct FindStructResult { 306 Str var_type = cstr("");
136 StructMap *map; 307 if (node->var.type) {
137 Scope *scope; 308 var_type = type_inference(a, node->var.type, scope);
138} FindStructResult; 309 if (!var_type.size || str_eq(var_type, cstr("nil"))) {
310 SEMANTIC_ERROR(a, node, "can't declare a variable of type `nil`",
311 symbol);
312 }
313 }
139 314
140FindStructResult 315 Str var_val = cstr("");
141find_struct(Scope *scope, Str type) { 316 if (node->var.val) {
142 while (scope != NULL) { 317 var_val = type_inference(a, node->var.val, scope);
143 StructMap *val = structmap_lookup(&scope->structs, type); 318 if (!var_val.size || str_eq(var_val, cstr("nil"))) {
144 if (val != NULL) { 319 SEMANTIC_ERROR(a, node, "can't bind `nil` to variable '%s'",
145 return (FindStructResult){.map = val, .scope = scope}; 320 symbol);
146 } 321 }
147 scope = scope->parent;
148 } 322 }
149 return (FindStructResult){0};
150}
151 323
152typedef struct FindSymbolResult { 324 if (!var_type.size) {
153 SymbolMap *map; 325 var_type = var_val;
154 Scope *scope; 326 } else if (!var_val.size) {
155} FindSymbolResult; 327 var_val = var_type;
328 }
156 329
157FindSymbolResult 330 if (!str_eq(var_type, var_val)) {
158find_symbol(Scope *scope, Str type) { 331 bool int_types = strset_lookup(&a->integer_types, var_type) &&
159 while (scope != NULL) { 332 strset_lookup(&a->integer_types, var_val);
160 SymbolMap *val = symmap_lookup(&scope->symbols, type); 333 bool float_types = strset_lookup(&a->float_types, var_type) &&
161 if (val != NULL) { 334 strset_lookup(&a->float_types, var_val);
162 return (FindSymbolResult){.map = val, .scope = scope}; 335 bool ptr_types = str_has_prefix(var_type, cstr("@")) &&
336 str_has_prefix(var_val, cstr("@"));
337 if (!int_types && !float_types && !ptr_types) {
338 SEMANTIC_ERROR(a, node,
339 "type mismatch, trying to assign %s to a "
340 "variable of type %s",
341 var_val, var_type);
163 } 342 }
164 scope = scope->parent;
165 } 343 }
166 return (FindSymbolResult){0}; 344
345 Str id = register_var(a, node, scope, symbol, var_type);
346 node->var.name->unique_name = id;
347 node->var.name->type = var_type;
348 return node->type;
167} 349}
168 350
169void 351Str
170graph_typescope(Scope *scope, Arena a) { 352analyze_type(Analyzer *a, Node *node, Scope *scope) {
171 if (!scope->symbols) { 353 Str node_type = node->value.str;
172 return; 354 FindSymbolResult sym = find_symbol(scope, node_type);
173 } 355 if (!sym.map) {
174 SymbolMapIter iter = symmap_iterator(scope->symbols, &a); 356 SEMANTIC_ERROR(a, node, "unknown type name: '%s'", node_type);
175 SymbolMap *type = symmap_next(&iter, &a); 357 } else if (sym.map->val.kind != SYM_BUILTIN_TYPE &&
176 print( 358 sym.map->val.kind != SYM_TYPE &&
177 "%d[shape=\"none\" label=<<TABLE ALIGN=\"left\" STYLE=\"rounded\" " 359 sym.map->val.kind != SYM_STRUCT) {
178 "BORDER=\"1\" CELLBORDER=\"0\" CELLSPACING=\"0\" CELLPADDING=\"6\" " 360 SEMANTIC_ERROR(a, node, "'%s' is not a valid type name", node_type);
179 "COLUMNS=\"*\">",
180 scope->id);
181 print(
182 "<TR >"
183 "<TD ALIGN=\"left\" > NAME </TD>"
184 "<TD ALIGN=\"left\" > TYPE </TD>"
185 "</TR>");
186 while (type) {
187 print(
188 "<TR>"
189 "<TD ALIGN=\"left\"> %s </TD>"
190 "<TD ALIGN=\"left\"> %s</TD>"
191 "</TR>",
192 type->key, type->val.name);
193 type = symmap_next(&iter, &a);
194 } 361 }
195 println("</TABLE>>];");
196 362
197 sz this_id = scope->id; 363 if (node->t.next) {
198 while (scope->parent) { 364 // If this is a compound type we need to register all the intermediate
199 if (scope->parent->symbols) { 365 // types on the symbols table. For example in this deranged example:
200 println("%d:e->%d:w;", this_id, scope->parent->id); 366 //
201 break; 367 // let insane_type: @[8][4]@[2]U16
202 } else { 368 //
203 scope = scope->parent; 369 // Registered types:
370 // --------------------------------------
371 // name size
372 // --------------------------------------
373 // U16 2
374 // [2]U16 2 * 2 = 4
375 // @[2]U16 sizeof(Ptr) = WORD_SIZE
376 // [4]@[2]U16 4 * WORD_SIZE = 32
377 // [8][4]@[2]U16 8 * 32 = 256
378 // @[8][4]@[2]U16 sizeof(Ptr) = WORD_SIZE
379 // --------------------------------------
380 //
381 Node *next = node;
382 // Find last node in the chain.
383 sz n = 0;
384 while (next->t.next) {
385 n++;
386 next = next->t.next;
387 }
388
389 // Register type.
390 Str prefix = cstr("");
391 sz size_multiplier = 1;
392 sz base_size = sym.map->val.t.size;
393 for (sz i = 0; i < n; i++) {
394 switch (next->kind) {
395 case NODE_PTR: {
396 base_size = WORD_SIZE;
397 size_multiplier = 1;
398 prefix = str_concat(cstr("@"), prefix, a->storage);
399 } break;
400 case NODE_ARR: {
401 switch (next->array.kind) {
402 case NODE_ARR_STATIC: {
403 sz idx = next->array.size->value.i;
404 Str val = str_from_int(idx, a->storage);
405 prefix = str_concat(cstr("]"), prefix, a->storage);
406 prefix = str_concat(val, prefix, a->storage);
407 prefix = str_concat(cstr("["), prefix, a->storage);
408 size_multiplier *= idx;
409 } break;
410 case NODE_ARR_SLICE: {
411 base_size = WORD_SIZE * 2;
412 size_multiplier = 1;
413 prefix = str_concat(cstr("[]"), prefix, a->storage);
414 } break;
415 case NODE_ARR_DYNAMIC: {
416 base_size = WORD_SIZE * 3;
417 size_multiplier = 1;
418 prefix =
419 str_concat(cstr("[...]"), prefix, a->storage);
420 } break;
421 default: {
422 SEMANTIC_ERROR(a, node, "unimplemented");
423 } break;
424 }
425 } break;
426 default: {
427 SEMANTIC_ERROR(a, node, "unreachable");
428 } break;
429 }
430 Str type_name = str_concat(prefix, node_type, a->storage);
431 Str type_id = str_concat(prefix, sym.map->val.t.id, a->storage);
432 FindSymbolResult sym = find_symbol(scope, type_name);
433 if (!sym.map || !str_eq(sym.map->val.t.id, type_id)) {
434 Type type = {
435 .id = type_id,
436 .size = base_size * size_multiplier,
437 .element_size = base_size,
438 };
439 symmap_insert(&scope->symbols, type_name,
440 (Symbol){
441 .scope_id = scope->id,
442 .id = type_id,
443 .name = type_name,
444 .type = cstr(":T"),
445 .kind = SYM_TYPE,
446 .t = type,
447 },
448 a->storage);
449 }
450 node->type = type_id;
451 next->type = node->type;
452 next = next->parent;
204 } 453 }
454 } else {
455 node->type = sym.map->val.t.id;
205 } 456 }
457
458 return node->type;
206} 459}
207 460
208void 461Str
209graph_functions(Scope *scope, Arena a) { 462register_struct(Analyzer *a, Node *node, Scope *scope) {
210 if (!scope->funcs) { 463 Str symbol = node->value.str;
211 return; 464 FindSymbolResult sym = find_symbol(scope, symbol);
212 } 465 if (sym.map && scope->id == sym.scope->id) {
213 FunMapIter iter = funmap_iterator(scope->funcs, &a); 466 SEMANTIC_ERROR(a, node, "symbol '%s' already defined", symbol);
214 FunMap *func = funmap_next(&iter, &a);
215 print(
216 "fun_%d[shape=\"none\" label=<<TABLE ALIGN=\"left\" STYLE=\"rounded\" "
217 "BORDER=\"1\" CELLBORDER=\"0\" CELLSPACING=\"0\" CELLPADDING=\"6\" "
218 "COLUMNS=\"*\">",
219 scope->id);
220 print(
221 "<TR >"
222 "<TD ALIGN=\"left\" > NAME </TD>"
223 "<TD ALIGN=\"left\" > PARAMS </TD>"
224 "<TD ALIGN=\"left\" > RETURN </TD>"
225 "</TR>");
226 while (func) {
227 print(
228 "<TR>"
229 "<TD PORT=\"%s\" ALIGN=\"left\" > %s </TD>"
230 "<TD ALIGN=\"left\" > %s </TD>"
231 "<TD ALIGN=\"left\" > %s </TD>"
232 "</TR>",
233 func->val.name, func->val.name, func->val.param_type,
234 func->val.return_type);
235 func = funmap_next(&iter, &a);
236 } 467 }
237 println("</TABLE>>];"); 468 Str id = unique_name(a, symbol, scope);
238 sz this_id = scope->id; 469
239 while (scope->parent) { 470 // Pre-registering the struct will allow us for self-referencing, to be
240 if (scope->parent->symbols) { 471 // updated when we finish when field and sizing information.
241 println("fun_%d:e->fun_%d:%s:w;", this_id, scope->parent->id, 472 Type type = {
242 scope->name); 473 .id = id,
243 break; 474 .size = 0,
244 } else { 475 };
245 scope = scope->parent; 476 SymbolMap *map = symmap_insert(&scope->symbols, symbol,
477 (Symbol){
478 .scope_id = scope->id,
479 .id = id,
480 .name = symbol,
481 .type = cstr(":T"),
482 .kind = SYM_STRUCT,
483 .t = type,
484 },
485 a->storage);
486
487 sz size = 0;
488 sz largest = 0;
489 StructField *fields = NULL;
490 for (sz i = 0; i < array_size(node->struct_field); i++) {
491 Node *field = node->struct_field[i];
492 Str field_name = field->value.str;
493 Str field_type = analyze_type(a, field->field.type, scope);
494 Str field_type_name = field_type;
495 field_type_name = str_split(&field_type_name, cstr("."));
496 if (str_eq(field_type_name, type.id)) {
497 SEMANTIC_ERROR(a, node, "recursive struct definition on field '%s'",
498 field_name);
499 }
500 FindSymbolResult sym = find_symbol(scope, field_type_name);
501 if (!sym.map) {
502 SEMANTIC_ERROR(a, node, "unknown type '%s'", field_type_name);
503 }
504 sz field_size = sym.map->val.t.size;
505 sz element_size = sym.map->val.t.element_size;
506 // When we have multiple structs nested or arrays
507 // inside structs we need to get the *maximum* size of the inner fields,
508 // so for example:
509 // struct foo {
510 // u8 c;
511 // struct bar {
512 // u8 *p;
513 // u16 x;
514 // } inner;
515 // };
516 // because inner has a char* (8 bytes in 64bit), that's the
517 // value we should use. Another example
518 // struct foo {
519 // u8 c;
520 // struct bar {
521 // u8 p;
522 // u16 x;
523 // } inner;
524 // };
525 // in this case the maximum value inside is a U16 (2 bytes). We
526 // thus need to perform a recursive analysis and find in the
527 // symbol table the value for bar.x. We may want to store this
528 // info as we go.
529 largest = MAX(element_size, largest);
530
531 // Make sure this field is aligned, inserting padding if
532 // necessary.
533 sz padding = -(size + element_size) & (element_size - 1);
534 sz offset = size + padding;
535 size += field_size + padding;
536
537 // Linear search types array to make sure we don't have the
538 // element already inserted.
539 for (sz i = 0; i < array_size(fields); i++) {
540 if (str_eq(field_name, fields[i].name)) {
541 SEMANTIC_ERROR(a, node, "struct field '%s' already exists",
542 field_name);
543 }
246 } 544 }
545
546 // Register into types array
547 Type t = {
548 .id = field_type,
549 .size = field_size,
550 .offset = offset,
551 .element_size = element_size,
552 };
553 StructField f = {
554 .name = field_name,
555 .type = t,
556 .padding = padding,
557 };
558 array_push(fields, f, a->storage);
559
560 field_name = str_concat(cstr("."), field_name, a->storage);
561 field_name = str_concat(id, field_name, a->storage);
562 symmap_insert(&scope->symbols, field_name,
563 (Symbol){
564 .scope_id = scope->id,
565 .id = field_name,
566 .name = field_name,
567 .type = t.id,
568 .kind = SYM_STRUCT_FIELD,
569 .t = t,
570 },
571 a->storage);
247 } 572 }
573 // Padding at the end of the struct is determined by the largest
574 // element it contains.
575 sz padding = -size & (CLAMP(largest, 0, WORD_SIZE) - 1);
576 size += padding;
577
578 // Update struct value information.
579 map->val.t.size = size;
580 map->val.t.element_size = largest;
581 map->val.t.fields = fields;
582
583 if (fields == NULL) {
584 SEMANTIC_ERROR(a, node, "struct '%s' is empty", symbol);
585 }
586
587 node->unique_name = id; // TODO: need to assign unique names to these.
588 node->type = cstr("nil");
589 return node->type;
248} 590}
249 591
250void 592Str
251graph_types(Scope **scopes, Arena a) { 593analyze_dot(Analyzer *a, Node *node, Scope *scope) {
252 if (scopes == NULL) return; 594 Str left = type_inference(a, node->binary.left, scope);
253 println("digraph types {"); 595 left = str_remove_prefix(left, cstr("@"));
254 println("rankdir=LR;"); 596 node->binary.left->lvalue = true;
255 println("ranksep=\"0.95 equally\";"); 597 if (node->binary.right->kind == NODE_SYMBOL) {
256 println("nodesep=\"0.5 equally\";"); 598 Str rname = node->binary.right->value.str;
257 println("overlap=scale;"); 599 Str full_name = str_concat(left, cstr("."), a->storage);
258 println("bgcolor=\"transparent\";"); 600 full_name = str_concat(full_name, rname, a->storage);
259 for (sz i = 0; i < array_size(scopes); i++) { 601 node->binary.right->value.str = full_name;
260 Scope *scope = scopes[i]; 602 } else if (node->binary.right->kind == NODE_INDEX) {
261 if (!scope) { 603 Str rname = node->binary.right->idx.next->value.str;
262 continue; 604 Str full_name = str_concat(left, cstr("."), a->storage);
263 } 605 full_name = str_concat(full_name, rname, a->storage);
264 println("subgraph %d {", i); 606 node->binary.right->idx.next->lvalue = node->lvalue;
265 graph_typescope(scope, a); 607 node->binary.right->idx.next->value.str = full_name;
266 graph_functions(scope, a); 608 } else if (node->binary.right->kind == NODE_DOT) {
267 println("}"); 609 Str rname = node->binary.right->binary.left->value.str;
610 Str full_name = str_concat(left, cstr("."), a->storage);
611 full_name = str_concat(full_name, rname, a->storage);
612 node->binary.right->binary.left->lvalue = node->lvalue;
613 node->binary.right->binary.left->value.str = full_name;
614 } else {
615 SEMANTIC_ERROR(a, node, "unimplemented");
268 } 616 }
269 println("}"); 617 Str right = type_inference(a, node->binary.right, scope);
618 node->type = right;
619
620 // TODO: Expand to work with methods and nested structs.
621 // TODO: Uniform Function Call Syntax (UFCS)
622 // - If the struct has a member, choose that, if not, try to find
623 // a function with the same name in the current environment and
624 // typecheck that the first parameter it takes is the name.
625 return node->type;
270} 626}
271 627
272void 628Str
273emit_semantic_error(Analyzer *a, Node *n, Str msg) { 629analyze_symbol(Analyzer *a, Node *node, Scope *scope) {
274 eprintln("%s:%d:%d: error: %s", a->file_name, n->line, n->col, msg); 630 Str symbol = node->value.str;
275 a->err = true; 631 // Is a valid symbol.
632 FindSymbolResult sym = find_symbol(scope, symbol);
633 if (!sym.map) {
634 SEMANTIC_ERROR(a, node, "couldn't resolve symbol '%s'", symbol);
635 } else if (sym.map->val.kind == SYM_BUILTIN_TYPE ||
636 sym.map->val.kind == SYM_TYPE) {
637 SEMANTIC_ERROR(a, node, "'%s' is not a valid symbol name", symbol);
638 }
639
640 // We disallow capturing local variables from nested functions.
641 if (!str_eq(sym.scope->name, scope->name) && sym.scope->name.size) {
642 SEMANTIC_ERROR(a, node, "can't capture external local symbol '%s'",
643 symbol);
644 }
645 node->unique_name = sym.map->val.id;
646 node->type = sym.map->val.type;
647 return node->type;
276} 648}
277 649
278Str type_inference(Analyzer *a, Node *node, Scope *scope); 650Str
651analyze_set(Analyzer *a, Node *node, Scope *scope) {
652 // In case of a chain, setup all lvalues before type inference.
653 node->var.name->lvalue = true;
654 if (node->var.name->kind == NODE_DOT) {
655 Node *next = node->var.name;
656 while (next) {
657 next->lvalue = true;
658 next = next->binary.right;
659 }
660 }
279 661
280void 662 Str var_name = type_inference(a, node->var.name, scope);
281typecheck_field(Analyzer *a, Node *node, Scope *scope, Str symbol) { 663 Str var_val = type_inference(a, node->var.val, scope);
282 if (node->field.type->kind == NODE_COMPOUND_TYPE) { 664 if (!str_eq(var_name, var_val)) {
283 Str field_name = str_concat(symbol, cstr("."), a->storage); 665 if (str_has_prefix(var_name, cstr("@"))) {
284 field_name = str_concat(field_name, node->value.str, a->storage); 666 var_name = cstr("Ptr");
285 if (structmap_lookup(&scope->structs, field_name)) {
286 eprintln("%s:%d:%d: error: struct field '%s' already exists",
287 a->file_name, node->line, node->col, field_name);
288 a->err = true;
289 } 667 }
290 Str type = cstr("\\{ "); 668 if (str_has_prefix(var_val, cstr("@"))) {
291 for (sz i = 0; i < array_size(node->field.type->elements); i++) { 669 var_val = cstr("Ptr");
292 Node *field = node->field.type->elements[i];
293 typecheck_field(a, field, scope, field_name);
294 type = str_concat(type, field->type, a->storage);
295 type = str_concat(type, cstr(" "), a->storage);
296 } 670 }
297 type = str_concat(type, cstr("\\}"), a->storage); 671 bool int_types = strset_lookup(&a->integer_types, var_name) &&
298 node->type = type; 672 strset_lookup(&a->integer_types, var_val);
299 } else { 673 bool float_types = strset_lookup(&a->float_types, var_name) &&
300 Str field_name = str_concat(symbol, cstr("."), a->storage); 674 strset_lookup(&a->float_types, var_val);
301 field_name = str_concat(field_name, node->value.str, a->storage); 675 if (!int_types && !float_types) {
302 Str field_type = node->field.type->value.str; 676 SEMANTIC_ERROR(
303 if (!find_type(scope, field_type)) { 677 a, node,
304 eprintln("%s:%d:%d: error: unknown type '%s'", a->file_name, 678 "type mismatch, trying to assign %s to variable of type %s",
305 node->field.type->line, node->field.type->col, field_type); 679 var_val, var_name);
306 a->err = true;
307 } 680 }
308 if (node->field.type->is_ptr) { 681 }
309 field_type = str_concat(cstr("@"), field_type, a->storage); 682
683 node->type = cstr("nil");
684 return node->type;
685}
686
687Str
688analyze_deref(Analyzer *a, Node *node, Scope *scope) {
689 Node *next = node->deref.next;
690 Str amount = cstr("@");
691 while (next) {
692 if (next->kind == NODE_SYMBOL) {
693 break;
310 } 694 }
311 if (node->field.type->kind == NODE_ARR_TYPE) { 695 next = next->deref.next;
312 field_type = str_concat(cstr("@"), field_type, a->storage); 696 amount = str_concat(cstr("@"), amount, a->storage);
697 }
698 Str symbol = next->value.str;
699 Str type = type_inference(a, next, scope);
700 if (!str_has_prefix(type, amount)) {
701 Str s = str_concat(symbol, amount, a->storage);
702 SEMANTIC_ERROR(a, node, "invalid type dereference %s from type %s", s,
703 type);
704 }
705 type = str_remove_prefix(type, amount);
706 node->value.str = symbol;
707 node->unique_name = next->unique_name;
708 node->type = type;
709 return node->type;
710}
711
712Str
713analyze_ifelse(Analyzer *a, Node *node, Scope *scope) {
714 Str cond_type = type_inference(a, node->ifelse.cond, scope);
715 if (!str_eq(cond_type, cstr("Bool"))) {
716 SEMANTIC_ERROR(a, node, "non-boolean expression on if condition");
717 }
718 Str if_type;
719 if (node->ifelse.expr_true->kind == NODE_BLOCK) {
720 if_type = type_inference(a, node->ifelse.expr_true, scope);
721 } else {
722 Scope *next = scope_alloc(a, scope);
723 if_type = type_inference(a, node->ifelse.expr_true, next);
724 }
725 if (str_has_prefix(if_type, cstr("ret:")) ||
726 str_has_prefix(if_type, cstr("flow:"))) {
727 if_type = cstr("nil");
728 }
729 node->type = if_type;
730 if (node->ifelse.expr_else) {
731 Str else_type;
732 if (node->ifelse.expr_else->kind == NODE_BLOCK) {
733 else_type = type_inference(a, node->ifelse.expr_else, scope);
734 } else {
735 Scope *next = scope_alloc(a, scope);
736 else_type = type_inference(a, node->ifelse.expr_else, next);
313 } 737 }
314 if (structmap_lookup(&scope->structs, field_name)) { 738 if (str_has_prefix(else_type, cstr("ret:")) ||
315 eprintln("%s:%d:%d: error: struct field '%s' already exists", 739 str_has_prefix(else_type, cstr("flow:"))) {
316 a->file_name, node->line, node->col, field_name); 740 else_type = cstr("nil");
317 a->err = true;
318 } 741 }
319 if (node->field.val) { 742 if (!str_eq(if_type, else_type)) {
320 Str type = type_inference(a, node->field.val, scope); 743 bool int_types = strset_lookup(&a->integer_types, if_type) &&
321 if (!str_eq(type, field_type)) { 744 strset_lookup(&a->integer_types, else_type);
322 eprintln( 745 bool float_types = strset_lookup(&a->float_types, if_type) &&
323 "%s:%d:%d: error: mismatched types in struct " 746 strset_lookup(&a->float_types, else_type);
324 "value " 747 if (!int_types && !float_types) {
325 "for '%s': %s expected %s", 748 SEMANTIC_ERROR(a, node,
326 a->file_name, node->line, node->col, field_name, type, 749 "mismatched types on if/else branches: %s / %s",
327 field_type); 750 if_type, else_type);
328 a->err = true;
329 } 751 }
330 } 752 }
331 structmap_insert(&scope->structs, field_name,
332 (Struct){
333 .name = field_name,
334 .type = field_type,
335 .val = node->field.val,
336 },
337 a->storage);
338 symmap_insert(&scope->symbols, field_name,
339 (Symbol){.name = field_type, .kind = SYM_STRUCT_FIELD},
340 a->storage);
341 node->type = field_type;
342 } 753 }
754
755 // If it returns a value, verify it contains an `else` statement.
756 if (!str_eq(if_type, cstr("nil"))) {
757 if (!node->ifelse.expr_else) {
758 SEMANTIC_ERROR(a, node,
759 "missing else statement in valued if expression");
760 }
761 }
762 return node->type;
343} 763}
344 764
345void 765Str
346typecheck_lit_field(Analyzer *a, Node *node, Scope *scope, Str symbol) { 766analyze_while(Analyzer *a, Node *node, Scope *scope) {
347 if (node->field.val->kind == NODE_COMPOUND_TYPE) { 767 Str cond_type = type_inference(a, node->loop.cond, scope);
348 Str type = cstr("\\{ "); 768 if (!str_eq(cond_type, cstr("Bool"))) {
349 for (sz i = 0; i < array_size(node->field.val->elements); i++) { 769 SEMANTIC_ERROR(a, node, "non-boolean expression on loop condition");
350 Node *field = node->field.val->elements[i]; 770 }
351 Str field_name = str_concat(symbol, cstr("."), a->storage); 771 if (node->loop.expr->kind != NODE_BLOCK) {
352 field_name = str_concat(field_name, field->value.str, a->storage); 772 scope = scope_alloc(a, scope);
353 typecheck_lit_field(a, field, scope, field_name); 773 }
354 type = str_concat(type, field->type, a->storage); 774 type_inference(a, node->loop.expr, scope);
355 type = str_concat(type, cstr(" "), a->storage); 775 node->type = cstr("nil");
776 return node->type;
777}
778
779Str
780analyze_cond(Analyzer *a, Node *node, Scope *scope) {
781 Str previous = cstr("");
782 bool has_else = false;
783 bool err = false;
784 for (sz i = 0; i < array_size(node->match.cases); i++) {
785 Node *expr = node->match.cases[i];
786 Str next = type_inference(a, expr, scope);
787 if (str_has_prefix(next, cstr("ret:"))) {
788 next = cstr("nil");
356 } 789 }
357 type = str_concat(type, cstr("\\}"), a->storage); 790 if (str_has_prefix(next, cstr("flow:"))) {
358 node->type = type; 791 next = cstr("nil");
359 } else { 792 }
360 StructMap *s = structmap_lookup(&scope->structs, symbol); 793 if (i != 0 && !str_eq(next, previous)) {
361 if (!s) { 794 err = true;
362 eprintln("%s:%d:%d: error: unknown struct field '%s'", a->file_name,
363 node->line, node->col, symbol);
364 a->err = true;
365 return;
366 } 795 }
367 Str field_type = s->val.type; 796 if (!expr->case_entry.cond) {
368 Str type = type_inference(a, node->field.val, scope); 797 has_else = true;
369 if (!str_eq(type, field_type)) { 798 }
370 eprintln( 799 previous = next;
371 "%s:%d:%d: error: mismatched types in struct " 800 }
372 "value " 801 if (err) {
373 "for '%s': %s expected %s", 802 SEMANTIC_ERROR(a, node, "non-matching types on cond expressions");
374 a->file_name, node->line, node->col, symbol, type, field_type); 803 }
375 a->err = true; 804 node->type = previous;
805
806 // If it returns a value, verify it contains an `else` statement.
807 if (!str_eq(node->type, cstr("nil")) &&
808 !str_has_prefix(node->type, cstr("ret:")) &&
809 !str_has_prefix(node->type, cstr("flow:"))) {
810 if (!has_else) {
811 SEMANTIC_ERROR(a, node,
812 "missing else statement in valued cond expression");
376 } 813 }
377 node->type = field_type;
378 } 814 }
815 return node->type;
379} 816}
380 817
381void 818Str
382typecheck_returns(Analyzer *a, Node *node, Str expected) { 819analyze_funcall(Analyzer *a, Node *node, Scope *scope) {
383 if (!node) { 820 Str name = node->value.str;
384 return; 821 FindSymbolResult sym = find_symbol(scope, name);
822 if (!sym.map) {
823 SEMANTIC_ERROR(a, node, "couldn't resolve function name '%s'", name);
824 } else if (sym.map->val.kind != SYM_BUILTIN_FUN &&
825 sym.map->val.kind != SYM_FUN) {
826 SEMANTIC_ERROR(a, node, "'%s' is not a valid function name", name);
385 } 827 }
386 // Traverse the tree again. 828 Function *fun = sym.map->val.fun;
387 switch (node->kind) { 829 node->unique_name = fun->id;
388 case NODE_COND: 830
389 case NODE_MATCH: { 831 // Handle builtins specially.
390 for (sz i = 0; i < array_size(node->match.cases); i++) { 832 if (sym.map->val.kind == SYM_BUILTIN_FUN) {
391 Node *next = node->match.cases[i]; 833 if (str_has_prefix(name, cstr("print"))) {
392 typecheck_returns(a, next, expected); 834 if (array_size(node->elements) == 0) {
835 SEMANTIC_ERROR(a, node,
836 "wrong number of parameters for funcall: %s "
837 "expected at lest 1",
838 name);
393 } 839 }
394 } break; 840 for (sz i = 0; i < array_size(node->elements); i++) {
395 case NODE_RETURN: { 841 Node *expr = node->elements[i];
396 Str type = str_remove_prefix(node->type, cstr("ret:")); 842 type_inference(a, expr, scope);
397 bool err = !str_eq(type, expected); 843 }
398 if (err) { 844 } else if (str_eq(name, cstr("sizeof"))) {
399 eprintln( 845 sz arity_fun = fun->param_arity;
400 "%s:%d:%d: error: mismatched return type %s, expected %s", 846 sz arity_call = array_size(node->elements);
401 a->file_name, node->line, node->col, type, expected); 847 if (arity_fun != arity_call) {
848 SEMANTIC_ERROR(a, node,
849 "wrong number of parameters for %s "
850 "expected %d got %d",
851 name, arity_fun, arity_call);
402 a->err = true; 852 a->err = true;
853 return cstr("");
403 } 854 }
404 } break; 855 Node *expr = node->elements[0];
405 case NODE_BLOCK: { 856 if (expr->kind != NODE_SYMBOL) {
406 for (sz i = 0; i < array_size(node->elements); i++) { 857 SEMANTIC_ERROR(a, node, "expected symbol name argument for %s ",
407 Node *next = node->elements[i]; 858 name);
408 typecheck_returns(a, next, expected);
409 } 859 }
410 } break; 860 Str symbol = expr->value.str;
411 case NODE_IF: { 861 FindSymbolResult sym = find_symbol(scope, symbol);
412 if (node->ifelse.expr_true) { 862 if (!sym.map) {
413 typecheck_returns(a, node->ifelse.expr_true, expected); 863 SEMANTIC_ERROR(a, node, "couldn't resolve symbol '%s'", symbol);
414 } 864 }
415 if (node->ifelse.expr_else) { 865 expr->unique_name = unique_name(a, symbol, sym.scope);
416 typecheck_returns(a, node->ifelse.expr_else, expected); 866 node->type = sym.map->val.type;
867 }
868
869 node->type = fun->ret_type;
870 return node->type;
871 }
872
873 sz arity_fun = fun->param_arity;
874 sz arity_call = array_size(node->elements);
875 if (arity_fun != arity_call) {
876 SEMANTIC_ERROR(
877 a, node,
878 "wrong number of parameters for funcall: %s expected %d got %d",
879 name, arity_fun, arity_call);
880 a->err = true;
881 return cstr("");
882 }
883
884 for (sz i = 0; i < array_size(node->elements); i++) {
885 Node *expr = node->elements[i];
886 Str param = type_inference(a, expr, scope);
887 Str expected = fun->params[i];
888 FindSymbolResult sym = find_symbol(scope, expected);
889 if (!sym.map) {
890 SEMANTIC_ERROR(a, node, "unexpected parameter type: %s", expected);
891 }
892 expected = sym.map->val.t.id;
893 if (!str_eq(param, expected)) {
894 bool int_types = strset_lookup(&a->integer_types, param) &&
895 strset_lookup(&a->integer_types, expected);
896 bool float_types = strset_lookup(&a->float_types, param) &&
897 strset_lookup(&a->float_types, expected);
898 if (!int_types && !float_types) {
899 SEMANTIC_ERROR(a, node,
900 "parameter type mismatch %s expected %s", param,
901 expected);
417 } 902 }
418 } break; 903 }
419 case NODE_SET: 904 }
420 case NODE_LET: { 905
421 if (node->var.val) { 906 node->type = fun->ret_type;
422 typecheck_returns(a, node->var.val, expected); 907 return node->type;
908}
909
910Str
911declare_fun(Analyzer *a, Node *node, Scope *scope) {
912 // Build the unique id.
913 Str symbol = node->func.name->value.str;
914 Str id = unique_name(a, symbol, scope);
915
916 if (symmap_lookup(&scope->symbols, symbol)) {
917 SEMANTIC_ERROR(
918 a, node, "function '%s' already declared in current scope", symbol);
919 }
920 {
921 FindSymbolResult res = find_symbol(scope, symbol);
922 if (res.map) {
923 SymbolKind kind = res.map->val.kind;
924 if (kind == SYM_BUILTIN_TYPE) {
925 SEMANTIC_ERROR(a, node, "can't override builtin type '%s'",
926 symbol);
927 }
928 if (kind == SYM_BUILTIN_FUN) {
929 SEMANTIC_ERROR(a, node, "can't override builtin function '%s'",
930 symbol);
423 } 931 }
424 } break; 932 }
425 case NODE_ADD: 933 }
426 case NODE_SUB: 934
427 case NODE_DIV: 935 Function fun = {
428 case NODE_MUL: 936 .index = a->fun_gen++,
429 case NODE_MOD: 937 .id = id,
430 case NODE_NOT: 938 .body = node->func.body,
431 case NODE_AND: 939 .name = symbol,
432 case NODE_OR: 940 .param_arity = array_size(node->func.params),
433 case NODE_EQ: 941 .return_arity = array_size(node->func.ret),
434 case NODE_NEQ: 942 };
435 case NODE_LT: 943
436 case NODE_GT: 944 // Build formatted type name.
437 case NODE_LE: 945 Str param_types = cstr("");
438 case NODE_GE: 946 for (sz i = 0; i < array_size(node->func.params); i++) {
439 case NODE_BITNOT: 947 Node *param = node->func.params[i];
440 case NODE_BITAND: 948 Str type = type_inference(a, param->param.type, scope);
441 case NODE_BITOR: 949 array_push(fun.params, type, a->storage);
442 case NODE_BITXOR: 950 param_types = str_concat(param_types, type, a->storage);
443 case NODE_BITLSHIFT: 951 if (i != array_size(node->func.params) - 1) {
444 case NODE_BITRSHIFT: { 952 param_types = str_concat(param_types, cstr(" "), a->storage);
445 if (node->binary.left) { 953 }
446 typecheck_returns(a, node->binary.left, expected); 954 }
955 if (!param_types.size) {
956 param_types = cstr("nil");
957 }
958 Str ret_type = cstr("");
959 for (sz i = 0; i < array_size(node->func.ret); i++) {
960 Node *expr = node->func.ret[i];
961 Str type = expr->value.str;
962 array_push(fun.returns, type, a->storage);
963 ret_type = str_concat(ret_type, type, a->storage);
964 if (i != array_size(node->func.ret) - 1) {
965 ret_type = str_concat(ret_type, cstr(","), a->storage);
966 }
967 }
968 if (!ret_type.size) {
969 ret_type = cstr("nil");
970 }
971 fun.type = param_types;
972 fun.type = str_concat(fun.type, cstr(" -> "), a->storage);
973 fun.type = str_concat(fun.type, ret_type, a->storage);
974 fun.type = str_concat(fun.type, cstr(")"), a->storage);
975 fun.type = str_concat(cstr("("), fun.type, a->storage);
976 fun.ret_type = ret_type;
977
978 // Register the function in the function table and symbols table.
979 FunMap *m = funmap_insert(&a->fun_map, id, fun, a->storage);
980 a->n_funcs++;
981 symmap_insert(&scope->symbols, symbol,
982 (Symbol){
983 .scope_id = scope->id,
984 .id = fun.id,
985 .name = fun.name,
986 .type = fun.type,
987 .kind = SYM_FUN,
988 .fun = &m->val,
989 },
990 a->storage);
991
992 // Register the type signature on the symbol table.
993 Type type = {
994 .id = fun.type,
995 .size = WORD_SIZE,
996 .element_size = WORD_SIZE,
997 };
998 symmap_insert(&scope->symbols, type.id,
999 (Symbol){
1000 .scope_id = scope->id,
1001 .id = type.id,
1002 .name = type.id,
1003 .type = cstr(":T"),
1004 .kind = SYM_TYPE,
1005 .t = type,
1006 },
1007 a->storage);
1008
1009 node->func.name->unique_name = id;
1010 return id;
1011}
1012
1013Str
1014analyze_function(Analyzer *a, Node *node, Scope *scope) {
1015 node->type = cstr("nil");
1016 if (scope->parent != NULL) {
1017 declare_fun(a, node, scope);
1018 }
1019 Str fun_name = node->func.name->value.str;
1020 FindSymbolResult res = find_symbol(scope, fun_name);
1021 Str fun_id = res.map->val.id;
1022
1023 // Prepare new analysis context.
1024 Function *prev_function = a->current_function;
1025 FunMap *m = funmap_lookup(&a->fun_map, fun_id);
1026 a->current_function = &m->val;
1027 scope = scope_alloc(a, scope);
1028 scope->name = fun_id;
1029
1030 for (sz i = 0; i < array_size(node->func.params); i++) {
1031 Node *param = node->func.params[i];
1032 Str var_name = param->param.name->value.str;
1033
1034 // Self referencing.
1035 if (str_eq(var_name, fun_name)) {
1036 SEMANTIC_ERROR(a, param, "can't override self function name '%s'",
1037 var_name);
1038 }
1039
1040 // Overrides.
1041 {
1042 FindSymbolResult res = find_symbol(scope, var_name);
1043 if (res.map) {
1044 SymbolKind kind = res.map->val.kind;
1045 if (kind == SYM_BUILTIN_TYPE) {
1046 SEMANTIC_ERROR(a, param, "can't override builtin type '%s'",
1047 var_name);
1048 }
1049 if (kind == SYM_BUILTIN_FUN) {
1050 SEMANTIC_ERROR(a, param,
1051 "can't override builtin function '%s'",
1052 var_name);
1053 }
447 } 1054 }
448 if (node->binary.right) { 1055 }
449 typecheck_returns(a, node->binary.right, expected); 1056
1057 // Uniqueness.
1058 if (symmap_lookup(&scope->symbols, var_name)) {
1059 SEMANTIC_ERROR(a, param,
1060 "symbol '%s' already exists in current scope",
1061 var_name);
1062 }
1063 Str var_type = type_inference(a, param->param.type, scope);
1064 Str id = register_var(a, node, scope, var_name, var_type);
1065
1066 FindSymbolResult res = find_symbol(scope, var_type);
1067 var_type = res.map->val.t.id;
1068 param->param.name->type = var_type;
1069 param->param.name->unique_name = id;
1070 param->type = var_type;
1071 param->unique_name = id;
1072 }
1073 a->current_function->param_offset = a->current_function->var_offset;
1074
1075 for (sz i = 0; i < array_size(node->func.ret); i++) {
1076 Node *expr = node->func.ret[i];
1077 type_inference(a, expr, scope);
1078 }
1079
1080 Str ret_type = cstr("nil");
1081 if (node->func.body->kind == NODE_BLOCK) {
1082 for (sz i = 0; i < array_size(node->func.body->elements); i++) {
1083 Node *expr = node->func.body->elements[i];
1084 ret_type = type_inference(a, expr, scope);
1085 if (str_has_prefix(ret_type, cstr("ret:"))) {
1086 ret_type = str_remove_prefix(ret_type, cstr("ret:"));
1087 break;
450 } 1088 }
451 } break; 1089 }
452 default: break; 1090 } else {
1091 ret_type = type_inference(a, node->func.body, scope);
1092 }
1093
1094 // Ensure main body return matches the prototype.
1095 node->func.body->type = ret_type;
1096 Str fun_ret = a->current_function->ret_type;
1097 if (!str_eq(fun_ret, ret_type)) {
1098 bool int_types = strset_lookup(&a->integer_types, fun_ret) &&
1099 strset_lookup(&a->integer_types, ret_type);
1100 bool float_types = strset_lookup(&a->float_types, fun_ret) &&
1101 strset_lookup(&a->float_types, ret_type);
1102 if (!int_types && !float_types) {
1103 SEMANTIC_ERROR(a, node,
1104 "returning type mismatch %s "
1105 "expected %s",
1106 ret_type, fun_ret);
1107 }
453 } 1108 }
1109
1110 // Restore old context.
1111 a->current_function = prev_function;
1112 return node->type;
454} 1113}
455 1114
456Str 1115Str
457type_inference(Analyzer *a, Node *node, Scope *scope) { 1116type_inference(Analyzer *a, Node *node, Scope *scope) {
458 assert(a); 1117 assert(a);
459 assert(scope); 1118 assert(scope);
460 if (!node || a->err) { 1119 if (!node || a->panic) {
461 return cstr(""); 1120 return cstr("");
462 } 1121 }
463 // NOTE: For now we are not going to do implicit numeric conversions. 1122#if DEBUG == 1
1123 println("Typechecking node: %s", node_str[node->kind]);
1124#endif
464 switch (node->kind) { 1125 switch (node->kind) {
465 case NODE_LET: { 1126 case NODE_LET: return analyze_let(a, node, scope);
466 node->type = cstr("nil"); 1127 case NODE_SET: return analyze_set(a, node, scope);
467 node->var.name->parent = node;
468 Str symbol = node->var.name->value.str;
469 if (symmap_lookup(&scope->symbols, symbol)) {
470 eprintln(
471 "%s:%d:%d: error: symbol '%s' already exists in current "
472 "scope ",
473 a->file_name, node->var.name->line, node->var.name->col,
474 symbol);
475 a->err = true;
476 return cstr("");
477 }
478 if (node->var.type) {
479 node->var.type->parent = node;
480 Str type_name = node->var.type->value.str;
481 SymbolMap *type = find_type(scope, type_name);
482 if (type == NULL) {
483 eprintln("%s:%d:%d: error: unknown type '%s'", a->file_name,
484 node->var.type->line, node->var.type->col,
485 type_name);
486 a->err = true;
487 return cstr("");
488 }
489 if (node->var.type->is_ptr) {
490 type_name = str_concat(cstr("@"), type_name, a->storage);
491 }
492 if (node->var.type->kind == NODE_ARR_TYPE) {
493 type_name = str_concat(cstr("@"), type_name, a->storage);
494 if (node->var.type->sym.arr_size->value.i == 0) {
495 eprintln("%s:%d:%d: error: zero sized array '%s'",
496 a->file_name, node->var.type->line,
497 node->var.type->col, symbol);
498 a->err = true;
499 return cstr("");
500 }
501 // TODO: register array in scope
502 }
503 if (node->var.val) {
504 node->var.val->parent = node;
505 Str type = type_inference(a, node->var.val, scope);
506 if (!type.size) {
507 eprintln(
508 "%s:%d:%d: error: can't bind `nil` to variable "
509 "'%s'",
510 a->file_name, node->var.type->line,
511 node->var.type->col, symbol);
512 a->err = true;
513 return cstr("");
514 }
515 if (!str_eq(type, type_name)) {
516 if (!(strset_lookup(&a->integer_types, type) &&
517 strset_lookup(&a->integer_types, type_name)) &&
518 !(strset_lookup(&a->numeric_types, type) &&
519 strset_lookup(&a->numeric_types, type_name))) {
520 // Special case, enums can be treated as ints.
521 FindEnumResult res = find_enum(scope, type_name);
522 if (!(res.map && str_eq(type, cstr("int")))) {
523 eprintln(
524 "%s:%d:%d: error: type mismatch, trying to "
525 "assing "
526 "%s"
527 " to a variable of type %s",
528 a->file_name, node->var.type->line,
529 node->var.type->col, type, type_name);
530 a->err = true;
531 return cstr("");
532 }
533 }
534 }
535 }
536 symmap_insert(&scope->symbols, symbol,
537 (Symbol){
538 .name = type_name,
539 .kind = SYM_VAR,
540 },
541 a->storage);
542 node->var.name->type = type_name;
543 symbol = str_concat(cstr("."), symbol, a->storage);
544 symbol = str_concat(symbol, str_from_int(scope->id, a->storage),
545 a->storage);
546 node->unique_name = symbol;
547 return node->type;
548 }
549
550 // We don't know the type for this symbol, perform inference.
551 node->var.val->parent = node;
552 Str type = type_inference(a, node->var.val, scope);
553 if (!type.size || str_eq(type, cstr("nil")) ||
554 str_has_prefix(type, cstr("ret:"))) {
555 eprintln(
556 "%s:%d:%d: error: can't bind `nil` to variable "
557 "'%s'",
558 a->file_name, node->line, node->col, symbol);
559 a->err = true;
560 return cstr("");
561 }
562 symmap_insert(&scope->symbols, symbol,
563 (Symbol){.name = type, .kind = SYM_VAR}, a->storage);
564 node->var.name->type = type;
565 symbol = str_concat(cstr("."), symbol, a->storage);
566 symbol = str_concat(symbol, str_from_int(scope->id, a->storage),
567 a->storage);
568 node->unique_name = symbol;
569 return node->type;
570 } break;
571 case NODE_SET: {
572 node->var.name->parent = node;
573 node->var.val->parent = node;
574 Str name = type_inference(a, node->var.name, scope);
575 Str val = type_inference(a, node->var.val, scope);
576 if (!str_eq(name, val)) {
577 eprintln(
578 "%s:%d:%d: error: type mismatch, trying to assing "
579 "%s"
580 " to a variable of type %s",
581 a->file_name, node->line, node->col, val, name);
582 a->err = true;
583 return cstr("");
584 }
585 Str symbol = node->var.name->value.str;
586 FindSymbolResult sym = find_symbol(scope, symbol);
587 node->unique_name = str_concat(cstr("."), symbol, a->storage);
588 node->unique_name =
589 str_concat(node->unique_name,
590 str_from_int(sym.scope->id, a->storage), a->storage);
591 node->type = cstr("nil");
592 return node->type;
593 } break;
594 case NODE_STRUCT: { 1128 case NODE_STRUCT: {
595 node->type = cstr("nil"); 1129 if (scope->id != 0) {
596 Str symbol = node->value.str; 1130 register_struct(a, node, scope);
597 if (symmap_lookup(&scope->symbols, symbol) != NULL) {
598 eprintln(
599 "%s:%d:%d: error: struct '%s' already exists in current "
600 "scope",
601 a->file_name, node->line, node->col, symbol);
602 a->err = true;
603 return cstr("");
604 }
605 structmap_insert(&scope->structs, symbol, (Struct){.name = symbol},
606 a->storage);
607 for (sz i = 0; i < array_size(node->struct_field); i++) {
608 Node *field = node->struct_field[i];
609 field->parent = node;
610 typecheck_field(a, field, scope, symbol);
611 } 1131 }
612 symmap_insert(&scope->symbols, symbol,
613 (Symbol){.name = symbol, .kind = SYM_STRUCT},
614 a->storage);
615 return node->type;
616 } break;
617 case NODE_ENUM: {
618 node->type = cstr("nil"); 1132 node->type = cstr("nil");
619 Str symbol = node->value.str; 1133 return cstr("nil");
620 if (symmap_lookup(&scope->symbols, symbol) != NULL) {
621 eprintln(
622 "%s:%d:%d: error: enum '%s' already exists in current "
623 "scope",
624 a->file_name, node->line, node->col, symbol);
625 a->err = true;
626 return cstr("");
627 }
628 enummap_insert(&scope->enums, symbol,
629 (Enum){
630 .name = symbol,
631 .val = node->field.val,
632 },
633 a->storage);
634 for (sz i = 0; i < array_size(node->struct_field); i++) {
635 Node *field = node->struct_field[i];
636 field->parent = node;
637 Str field_name = str_concat(symbol, cstr("."), a->storage);
638 field_name =
639 str_concat(field_name, field->value.str, a->storage);
640 if (enummap_lookup(&scope->enums, field_name)) {
641 eprintln("%s:%d:%d: error: enum field '%s' already exists",
642 a->file_name, field->line, field->col, field_name);
643 a->err = true;
644 }
645 if (field->field.val) {
646 Str type = type_inference(a, field->field.val, scope);
647 if (!str_eq(type, cstr("int"))) {
648 eprintln(
649 "%s:%d:%d: error: non int enum value for '%s.%s'",
650 a->file_name, field->line, field->col, symbol,
651 field_name);
652 a->err = true;
653 }
654 }
655 enummap_insert(&scope->enums, field_name,
656 (Enum){.name = field_name}, a->storage);
657 symmap_insert(
658 &scope->symbols, field_name,
659 (Symbol){.name = field_name, .kind = SYM_ENUM_FIELD},
660 a->storage);
661 field->type = symbol;
662 }
663 symmap_insert(&scope->symbols, symbol,
664 (Symbol){.name = symbol, .kind = SYM_ENUM},
665 a->storage);
666 return node->type;
667 } break;
668 case NODE_IF: {
669 node->ifelse.cond->parent = node;
670 node->ifelse.expr_true->parent = node;
671 Str cond_type = type_inference(a, node->ifelse.cond, scope);
672 if (!str_eq(cond_type, cstr("bool"))) {
673 emit_semantic_error(
674 a, node->ifelse.cond,
675 cstr("non boolean expression on if condition"));
676 return cstr("");
677 }
678 if (node->ifelse.expr_true->kind == NODE_BLOCK) {
679 node->type = type_inference(a, node->ifelse.expr_true, scope);
680 } else {
681 Scope *next = typescope_alloc(a, scope);
682 node->type = type_inference(a, node->ifelse.expr_true, next);
683 }
684 if (str_has_prefix(node->type, cstr("ret:")) ||
685 str_has_prefix(node->type, cstr("flow:"))) {
686 node->type = cstr("nil");
687 }
688 if (node->ifelse.expr_else) {
689 node->ifelse.expr_else->parent = node;
690 Str else_type;
691 if (node->ifelse.expr_else->kind == NODE_BLOCK) {
692 else_type =
693 type_inference(a, node->ifelse.expr_else, scope);
694 } else {
695 Scope *next = typescope_alloc(a, scope);
696 else_type = type_inference(a, node->ifelse.expr_else, next);
697 }
698 if (!str_eq(node->type, else_type)) {
699 emit_semantic_error(
700 a, node, cstr("mismatch types for if/else branches"));
701 return cstr("");
702 }
703 }
704
705 // If it returns a value, verify it contains an `else` statement.
706 if (!str_eq(node->type, cstr("nil"))) {
707 if (!node->ifelse.expr_else) {
708 emit_semantic_error(
709 a, node,
710 cstr("missing else statement in if expression"));
711 return cstr("");
712 }
713 }
714 return node->type;
715 } break;
716 case NODE_WHILE: {
717 node->loop.cond->parent = node;
718 node->loop.expr->parent = node;
719 Str cond_type = type_inference(a, node->loop.cond, scope);
720 if (!str_eq(cond_type, cstr("bool"))) {
721 emit_semantic_error(
722 a, node->loop.cond,
723 cstr("non boolean expression on while condition"));
724 return cstr("");
725 }
726 if (node->loop.expr->kind != NODE_BLOCK) {
727 scope = typescope_alloc(a, scope);
728 }
729 type_inference(a, node->loop.expr, scope);
730 node->type = cstr("nil");
731 return node->type;
732 } break;
733 case NODE_COND: {
734 Str previous = cstr("");
735 bool has_else = false;
736 for (sz i = 0; i < array_size(node->match.cases); i++) {
737 Node *expr = node->match.cases[i];
738 expr->parent = node;
739 Str next = type_inference(a, expr, scope);
740 if (i != 0 && !str_eq(next, previous)) {
741 emit_semantic_error(
742 a, node,
743 cstr("non-matching types for cond expressions"));
744 return cstr("");
745 }
746 if (!expr->case_entry.cond) {
747 has_else = true;
748 }
749 previous = next;
750 }
751 // If it returns a value, verify it contains an `else` statement.
752 if (!str_eq(node->type, cstr("nil")) &&
753 !str_has_prefix(node->type, cstr("ret:")) &&
754 !str_has_prefix(node->type, cstr("flow:"))) {
755 if (!has_else) {
756 emit_semantic_error(
757 a, node,
758 cstr("missing else statement in cond expression"));
759 return cstr("");
760 }
761 }
762 node->type = previous;
763 return node->type;
764 } break; 1134 } break;
1135 case NODE_IF: return analyze_ifelse(a, node, scope);
1136 case NODE_WHILE: return analyze_while(a, node, scope);
1137 case NODE_COND: return analyze_cond(a, node, scope);
765 case NODE_MATCH: { 1138 case NODE_MATCH: {
766 node->match.expr->parent = node; 1139 // Str e = type_inference(a, node->match.expr, scope);
767 Str e = type_inference(a, node->match.expr, scope); 1140 // if (str_eq(e, cstr("Int"))) {
768 if (str_eq(e, cstr("int"))) { 1141 // // Integer matching.
769 // Integer matching. 1142 // for (sz i = 0; i < array_size(node->match.cases); i++) {
770 for (sz i = 0; i < array_size(node->match.cases); i++) { 1143 // Node *field = node->match.cases[i];
771 Node *field = node->match.cases[i]; 1144 // if (field->case_entry.cond) {
772 field->parent = node; 1145 // if (field->case_entry.cond->kind != NODE_NUM_INT &&
773 if (field->case_entry.cond) { 1146 // field->case_entry.cond->kind != NODE_NUM_UINT) {
774 if (field->case_entry.cond->kind != NODE_NUM_INT && 1147 // emit_semantic_error(
775 field->case_entry.cond->kind != NODE_NUM_UINT) { 1148 // a, field->case_entry.cond,
776 emit_semantic_error( 1149 // cstr(
777 a, field->case_entry.cond, 1150 // "non-integer or enum types on match
778 cstr( 1151 // case"));
779 "non-integer or enum types on match case")); 1152 // }
780 } 1153 // }
781 } 1154 // }
782 } 1155 // } else {
783 } else { 1156 // // Get enum type and de-structure the match.
784 // Get enum type and de-structure the match. 1157 // FindEnumResult res = find_enum(scope, e);
785 FindEnumResult res = find_enum(scope, e); 1158 // Str enum_prefix =
786 Str enum_prefix = 1159 // str_concat(res.map->val.name, cstr("."), a->storage);
787 str_concat(res.map->val.name, cstr("."), a->storage); 1160 // for (sz i = 0; i < array_size(node->match.cases); i++) {
788 for (sz i = 0; i < array_size(node->match.cases); i++) { 1161 // Node *field = node->match.cases[i];
789 Node *field = node->match.cases[i]; 1162 // if (field->case_entry.cond) {
790 field->parent = node; 1163 // Str field_name = str_concat(
791 if (field->case_entry.cond) { 1164 // enum_prefix, field->case_entry.cond->value.str,
792 Str field_name = str_concat( 1165 // a->storage);
793 enum_prefix, field->case_entry.cond->value.str, 1166 // if (!enummap_lookup(&res.scope->enums, field_name)) {
794 a->storage); 1167 // eprintln("%s:%d:%d: error: unknown enum field
795 if (!enummap_lookup(&res.scope->enums, field_name)) { 1168 // '%s'",
796 eprintln("%s:%d:%d: error: unknown enum field '%s'", 1169 // a->file_name,
797 a->file_name, field->case_entry.cond->line, 1170 // field->case_entry.cond->line,
798 field->case_entry.cond->col, field_name); 1171 // field->case_entry.cond->col,
799 a->err = true; 1172 // field_name);
800 } 1173 // a->err = true;
801 } 1174 // }
802 } 1175 // }
803 } 1176 // }
804 Str previous = cstr(""); 1177 // }
805 for (sz i = 0; i < array_size(node->match.cases); i++) { 1178 // Str previous = cstr("");
806 Node *expr = node->match.cases[i]; 1179 // for (sz i = 0; i < array_size(node->match.cases); i++) {
807 expr->parent = node; 1180 // Node *expr = node->match.cases[i];
808 Str next = type_inference(a, expr, scope); 1181 // Str next = type_inference(a, expr, scope);
809 if (i != 0 && !str_eq(next, previous)) { 1182 // if (i != 0 && !str_eq(next, previous)) {
810 emit_semantic_error( 1183 // emit_semantic_error(
811 a, node, 1184 // a, node,
812 cstr("non-matching types for match expressions")); 1185 // cstr("non-matching types for match expressions"));
813 return cstr(""); 1186 // return cstr("");
814 } 1187 // }
815 previous = next; 1188 // previous = next;
816 } 1189 // }
817 node->type = previous; 1190 // node->type = previous;
818 return node->type; 1191 // return node->type;
819 } break; 1192 } break;
820 case NODE_CASE_MATCH: { 1193 case NODE_CASE_MATCH: {
821 if (node->case_entry.expr->kind != NODE_BLOCK) { 1194 if (node->case_entry.expr->kind != NODE_BLOCK) {
822 scope = typescope_alloc(a, scope); 1195 scope = scope_alloc(a, scope);
823 } 1196 }
824 node->case_entry.expr->parent = node;
825 node->type = type_inference(a, node->case_entry.expr, scope); 1197 node->type = type_inference(a, node->case_entry.expr, scope);
826 return node->type; 1198 return node->type;
827 } break; 1199 } break;
828 case NODE_CASE_COND: { 1200 case NODE_CASE_COND: {
829 node->case_entry.expr->parent = node;
830 if (node->case_entry.cond) { 1201 if (node->case_entry.cond) {
831 node->case_entry.cond->parent = node;
832 Str cond = type_inference(a, node->case_entry.cond, scope); 1202 Str cond = type_inference(a, node->case_entry.cond, scope);
833 if (!str_eq(cond, cstr("bool"))) { 1203 if (!str_eq(cond, cstr("Bool"))) {
834 emit_semantic_error(a, node, 1204 SEMANTIC_ERROR(a, node,
835 cstr("non-boolean case condition")); 1205 "non-boolean expression on case condition");
836 } 1206 }
837 } 1207 }
838 if (node->case_entry.expr->kind != NODE_BLOCK) { 1208 if (node->case_entry.expr->kind != NODE_BLOCK) {
839 scope = typescope_alloc(a, scope); 1209 scope = scope_alloc(a, scope);
840 } 1210 }
841 node->type = type_inference(a, node->case_entry.expr, scope); 1211 node->type = type_inference(a, node->case_entry.expr, scope);
842 return node->type; 1212 return node->type;
843 } break; 1213 } break;
844 case NODE_TRUE: 1214 case NODE_TRUE:
845 case NODE_FALSE: { 1215 case NODE_FALSE: {
846 node->type = cstr("bool"); 1216 node->type = cstr("Bool");
847 return node->type; 1217 return node->type;
848 } break; 1218 } break;
849 case NODE_NIL: { 1219 case NODE_NIL: {
@@ -853,23 +1223,18 @@ type_inference(Analyzer *a, Node *node, Scope *scope) {
853 case NODE_NOT: 1223 case NODE_NOT:
854 case NODE_AND: 1224 case NODE_AND:
855 case NODE_OR: { 1225 case NODE_OR: {
856 node->binary.left->parent = node;
857 Str left = type_inference(a, node->binary.left, scope); 1226 Str left = type_inference(a, node->binary.left, scope);
858 if (!str_eq(left, cstr("bool"))) { 1227 if (!str_eq(left, cstr("Bool"))) {
859 emit_semantic_error(a, node, 1228 SEMANTIC_ERROR(a, node, "expected bool on logic expression");
860 cstr("expected bool on logic expression"));
861 return cstr("");
862 } 1229 }
863 if (node->binary.right) { 1230 if (node->binary.right) {
864 node->binary.right->parent = node;
865 Str right = type_inference(a, node->binary.right, scope); 1231 Str right = type_inference(a, node->binary.right, scope);
866 if (!str_eq(right, cstr("bool"))) { 1232 if (!str_eq(right, cstr("Bool"))) {
867 emit_semantic_error( 1233 SEMANTIC_ERROR(a, node,
868 a, node, cstr("expected bool on logic expression")); 1234 "expected bool on logic expression");
869 return cstr("");
870 } 1235 }
871 } 1236 }
872 node->type = cstr("bool"); 1237 node->type = cstr("Bool");
873 return node->type; 1238 return node->type;
874 } break; 1239 } break;
875 case NODE_EQ: 1240 case NODE_EQ:
@@ -878,25 +1243,35 @@ type_inference(Analyzer *a, Node *node, Scope *scope) {
878 case NODE_GT: 1243 case NODE_GT:
879 case NODE_LE: 1244 case NODE_LE:
880 case NODE_GE: { 1245 case NODE_GE: {
881 node->binary.left->parent = node;
882 node->binary.right->parent = node;
883 Str left = type_inference(a, node->binary.left, scope); 1246 Str left = type_inference(a, node->binary.left, scope);
884 Str right = type_inference(a, node->binary.right, scope); 1247 Str right = type_inference(a, node->binary.right, scope);
1248 // Enable pointer comparison.
1249 if (str_has_prefix(left, cstr("@"))) {
1250 left = cstr("Ptr");
1251 }
1252 if (str_has_prefix(right, cstr("@"))) {
1253 right = cstr("Ptr");
1254 }
885 if (!str_eq(left, right)) { 1255 if (!str_eq(left, right)) {
886 emit_semantic_error( 1256 bool int_types = strset_lookup(&a->integer_types, left) &&
887 a, node, cstr("mismatched types on binary expression")); 1257 strset_lookup(&a->integer_types, right);
888 return cstr(""); 1258 bool float_types = strset_lookup(&a->float_types, left) &&
1259 strset_lookup(&a->float_types, right);
1260 if (!int_types && !float_types) {
1261 SEMANTIC_ERROR(
1262 a, node,
1263 "mismatched types on comparison expression: %s / %s",
1264 left, right);
1265 }
889 } 1266 }
890 node->type = cstr("bool"); 1267 node->type = cstr("Bool");
891 return node->type; 1268 return node->type;
892 } break; 1269 } break;
893 case NODE_BITNOT: { 1270 case NODE_BITNOT: {
894 node->binary.left->parent = node;
895 Str left = type_inference(a, node->binary.left, scope); 1271 Str left = type_inference(a, node->binary.left, scope);
896 if (!strset_lookup(&a->integer_types, left)) { 1272 if (!strset_lookup(&a->integer_types, left)) {
897 emit_semantic_error( 1273 SEMANTIC_ERROR(a, node,
898 a, node, cstr("non integer type on bit twiddling expr")); 1274 "non-integer type on bit twiddling expr");
899 return cstr("");
900 } 1275 }
901 node->type = left; 1276 node->type = left;
902 return node->type; 1277 return node->type;
@@ -906,15 +1281,12 @@ type_inference(Analyzer *a, Node *node, Scope *scope) {
906 case NODE_BITXOR: 1281 case NODE_BITXOR:
907 case NODE_BITLSHIFT: 1282 case NODE_BITLSHIFT:
908 case NODE_BITRSHIFT: { 1283 case NODE_BITRSHIFT: {
909 node->binary.left->parent = node;
910 node->binary.right->parent = node;
911 Str left = type_inference(a, node->binary.left, scope); 1284 Str left = type_inference(a, node->binary.left, scope);
912 Str right = type_inference(a, node->binary.right, scope); 1285 Str right = type_inference(a, node->binary.right, scope);
913 if (!strset_lookup(&a->integer_types, left) || 1286 if (!strset_lookup(&a->integer_types, left) ||
914 !strset_lookup(&a->integer_types, right)) { 1287 !strset_lookup(&a->integer_types, right)) {
915 emit_semantic_error( 1288 SEMANTIC_ERROR(a, node,
916 a, node, cstr("non integer type on bit twiddling expr")); 1289 "non-integer type on bit twiddling expr");
917 return cstr("");
918 } 1290 }
919 node->type = left; 1291 node->type = left;
920 return node->type; 1292 return node->type;
@@ -924,248 +1296,140 @@ type_inference(Analyzer *a, Node *node, Scope *scope) {
924 case NODE_DIV: 1296 case NODE_DIV:
925 case NODE_MUL: 1297 case NODE_MUL:
926 case NODE_MOD: { 1298 case NODE_MOD: {
927 node->binary.left->parent = node;
928 node->binary.right->parent = node;
929 Str left = type_inference(a, node->binary.left, scope); 1299 Str left = type_inference(a, node->binary.left, scope);
930 Str right = type_inference(a, node->binary.right, scope); 1300 Str right = type_inference(a, node->binary.right, scope);
1301 // Enable pointer arithmetic.
1302 if (str_has_prefix(left, cstr("@"))) {
1303 left = cstr("Ptr");
1304 }
1305 if (str_has_prefix(right, cstr("@"))) {
1306 right = cstr("Ptr");
1307 }
931 if (!strset_lookup(&a->numeric_types, left) || 1308 if (!strset_lookup(&a->numeric_types, left) ||
932 !strset_lookup(&a->numeric_types, right)) { 1309 !strset_lookup(&a->numeric_types, right)) {
933 emit_semantic_error( 1310 SEMANTIC_ERROR(
934 a, node, cstr("non numeric type on arithmetic expr")); 1311 a, node,
935 return cstr(""); 1312 "non-numeric type on arithmetic expression %s / %s", left,
1313 right);
936 } 1314 }
937 if (!str_eq(left, right)) { 1315 if (!str_eq(left, right)) {
938 emit_semantic_error( 1316 bool int_types = strset_lookup(&a->integer_types, left) &&
939 a, node, cstr("mismatched types on binary expression")); 1317 strset_lookup(&a->integer_types, right);
940 return cstr(""); 1318 bool float_types = strset_lookup(&a->float_types, left) &&
1319 strset_lookup(&a->float_types, right);
1320 if (!int_types && !float_types) {
1321 SEMANTIC_ERROR(
1322 a, node,
1323 "mismatched types on arithmetic expression: %s / %s",
1324 left, right);
1325 }
941 } 1326 }
942 node->type = left; 1327 node->type = left;
943 return node->type; 1328 return node->type;
944 } break; 1329 } break;
945 case NODE_NUM_UINT: { 1330 case NODE_NUM_UINT: {
946 node->type = cstr("int"); 1331 node->type = cstr("UInt");
947 return node->type; 1332 return node->type;
948 } break; 1333 } break;
949 case NODE_NUM_INT: { 1334 case NODE_NUM_INT: {
950 node->type = cstr("int"); 1335 node->type = cstr("Int");
951 return node->type; 1336 return node->type;
952 } break; 1337 } break;
953 case NODE_NUM_FLOAT: { 1338 case NODE_NUM_FLOAT: {
954 node->type = cstr("f64"); 1339 node->type = cstr("F64");
955 return node->type; 1340 return node->type;
956 } break; 1341 } break;
957 case NODE_STRING: { 1342 case NODE_STRING: {
958 node->type = cstr("str"); 1343 node->type = cstr("Str");
959 return node->type; 1344 return node->type;
960 } break; 1345 } break;
961 case NODE_ARR_TYPE: 1346 case NODE_TYPE: return analyze_type(a, node, scope);
962 case NODE_TYPE: { 1347 case NODE_DEREF: return analyze_deref(a, node, scope);
963 SymbolMap *type = find_type(scope, node->value.str); 1348 case NODE_SYMBOL: return analyze_symbol(a, node, scope);
964 if (!type) { 1349 case NODE_DOT: return analyze_dot(a, node, scope);
965 emit_semantic_error(a, node, cstr("unknown type")); 1350 case NODE_PTR: {
966 return cstr(""); 1351 Str base_type = type_inference(a, node->t.next, scope);
967 } 1352 node->type = base_type;
968 node->type = type->val.name; 1353 base_type = str_split(&base_type, cstr("."));
1354 FindSymbolResult sym = find_symbol(scope, base_type);
1355 if (!sym.map) {
1356 SEMANTIC_ERROR(a, node, "type not found '%s'", base_type);
1357 }
1358 Type type = sym.map->val.t;
1359 node->type = str_concat(cstr("@"), node->type, a->storage);
1360 base_type = str_concat(cstr("@"), base_type, a->storage);
1361 type.id = node->type;
1362 type.size = WORD_SIZE;
1363 symmap_insert(&scope->symbols, base_type,
1364 (Symbol){
1365 .scope_id = scope->id,
1366 .id = type.id,
1367 .name = base_type,
1368 .type = cstr(":T"),
1369 .kind = SYM_TYPE,
1370 .t = type,
1371 },
1372 a->storage);
1373 node->unique_name = node->t.next->unique_name;
969 return node->type; 1374 return node->type;
970 } break; 1375 } break;
971 case NODE_SYMBOL_IDX: 1376 case NODE_INDEX: {
972 case NODE_SYMBOL: { 1377 node->idx.next->lvalue = node->lvalue;
973 Str symbol = node->value.str; 1378 Str next_type = type_inference(a, node->idx.next, scope);
974 SymbolMap *type = find_type(scope, symbol); 1379 if (str_has_prefix(next_type, cstr("[")) ||
975 if (!type) { 1380 str_has_prefix(next_type, cstr("@["))) {
976 eprintln("%s:%d:%d: error: couldn't resolve symbol '%s'", 1381 str_split(&next_type, cstr("]"));
977 a->file_name, node->line, node->col, symbol); 1382 } else {
978 a->err = true; 1383 SEMANTIC_ERROR(a, node, "invalid index dereference for type %s",
979 return cstr(""); 1384 next_type);
980 }
981
982 FindSymbolResult sym = find_symbol(scope, symbol);
983 if (!str_eq(sym.scope->name, scope->name) && sym.scope->name.size) {
984 eprintln(
985 "%s:%d:%d: error: can't capture external local symbol '%s'",
986 a->file_name, node->line, node->col, symbol);
987 a->err = true;
988 return cstr("");
989 }
990 node->unique_name = str_concat(cstr("."), symbol, a->storage);
991 node->unique_name =
992 str_concat(node->unique_name,
993 str_from_int(sym.scope->id, a->storage), a->storage);
994
995 Str type_name = type->val.name;
996 if (node->kind == NODE_SYMBOL_IDX) {
997 node->sym.arr_size->parent = node;
998 Str idx_type = type_inference(a, node->sym.arr_size, scope);
999 if (!strset_lookup(&a->integer_types, idx_type)) {
1000 emit_semantic_error(
1001 a, node, cstr("can't resolve non integer index"));
1002 return cstr("");
1003 }
1004 type_name = str_remove_prefix(type_name, cstr("@"));
1005 }
1006 if (node->is_ptr) {
1007 type_name = str_concat(cstr("@"), type_name, a->storage);
1008 }
1009
1010 FindEnumResult e = find_enum(scope, type_name);
1011 if (e.map && str_eq(symbol, type_name)) {
1012 if (!node->sym.next) {
1013 eprintln(
1014 "%s:%d:%d: error: unspecified enum field for symbol "
1015 "'%s'",
1016 a->file_name, node->line, node->col, symbol);
1017 a->err = true;
1018 return cstr("");
1019 }
1020 // Check if there is a next and it matches the enum field.
1021 Str field = str_concat(type_name, cstr("."), a->storage);
1022 field =
1023 str_concat(field, node->sym.next->value.str, a->storage);
1024 if (!enummap_lookup(&e.scope->enums, field)) {
1025 eprintln(
1026 "%s:%d:%d: error: unknown enum field for "
1027 "'%s': %s",
1028 a->file_name, node->line, node->col, symbol,
1029 node->sym.next->value.str);
1030 a->err = true;
1031 return cstr("");
1032 }
1033
1034 node->sym.next->type = type_name;
1035 node->type = type_name;
1036 return node->sym.next->type;
1037 } 1385 }
1038 1386 Str idx_type = type_inference(a, node->idx.value, scope);
1039 FindStructResult s = find_struct(scope, type_name); 1387 if (!strset_lookup(&a->integer_types, idx_type)) {
1040 if (s.map) { 1388 SEMANTIC_ERROR(a, node,
1041 if (str_eq(symbol, type_name)) { 1389 "non-integer type '%s' on indexing operator",
1042 eprintln( 1390 idx_type);
1043 "%s:%d:%d: error: struct incomplete struct literal "
1044 "'%s', did you mean to use %s:{}?",
1045 a->file_name, node->line, node->col, symbol, symbol);
1046 a->err = true;
1047 return cstr("");
1048 } else {
1049 if (node->sym.next) {
1050 Str chain = type_name;
1051 Node *next = node;
1052 while (next->sym.next) {
1053 next = next->sym.next;
1054 chain = str_concat(chain, cstr("."), a->storage);
1055 chain =
1056 str_concat(chain, next->value.str, a->storage);
1057 }
1058 StructMap *field =
1059 structmap_lookup(&s.scope->structs, chain);
1060 if (!field) {
1061 eprintln(
1062 "%s:%d:%d: error: unknown struct field '%s'",
1063 a->file_name, node->line, node->col, chain);
1064 a->err = true;
1065 return cstr("");
1066 }
1067 Str field_type = field->val.type;
1068 if (next->kind == NODE_SYMBOL_IDX) {
1069 node->sym.arr_size->parent = node;
1070 Str idx_type =
1071 type_inference(a, next->sym.arr_size, scope);
1072 if (!strset_lookup(&a->integer_types, idx_type)) {
1073 emit_semantic_error(
1074 a, next,
1075 cstr("can't resolve non integer index"));
1076 return cstr("");
1077 }
1078 field_type =
1079 str_remove_prefix(field_type, cstr("@"));
1080 }
1081 node->type = field_type;
1082 return node->type;
1083 }
1084 }
1085 } 1391 }
1086 node->type = type_name; 1392 node->type = next_type;
1087 return node->type; 1393 return node->type;
1088 } break; 1394 } break;
1089 case NODE_STRUCT_LIT: { 1395 case NODE_STRUCT_LIT: {
1090 Str name = node->value.str; 1396 // Str name = node->value.str;
1091 FindStructResult s = find_struct(scope, name); 1397 // FindStructResult s = find_struct(scope, name);
1092 if (!s.map) { 1398 // if (!s.map) {
1093 eprintln("%s:%d:%d: error: unknown struct type '%s'", 1399 // eprintln("%s:%d:%d: error: unknown struct type '%s'",
1094 a->file_name, node->line, node->col, name); 1400 // a->file_name, node->line, node->col, name);
1095 a->err = true; 1401 // a->err = true;
1096 return cstr(""); 1402 // return cstr("");
1097 } 1403 // }
1098 1404
1099 StrSet *set = NULL; 1405 // StrSet *set = NULL;
1100 for (sz i = 0; i < array_size(node->elements); i++) { 1406 // for (sz i = 0; i < array_size(node->elements); i++) {
1101 Node *next = node->elements[i]; 1407 // Node *next = node->elements[i];
1102 next->parent = node; 1408 // Str field_name = str_concat(name, cstr("."),
1103 Str field_name = str_concat(name, cstr("."), a->storage); 1409 // a->storage); field_name =
1104 field_name = 1410 // str_concat(field_name, next->value.str,
1105 str_concat(field_name, next->value.str, a->storage); 1411 // a->storage);
1106
1107 if (strset_lookup(&set, field_name)) {
1108 eprintln(
1109 "%s:%d:%d: error: field '%s' already present in struct "
1110 "literal",
1111 a->file_name, next->line, next->col, field_name);
1112 a->err = true;
1113 } else {
1114 strset_insert(&set, field_name, a->storage);
1115 }
1116 typecheck_lit_field(a, next, s.scope, field_name);
1117 }
1118 node->type = name;
1119 return node->type;
1120 } break;
1121 case NODE_FUNCALL: {
1122 Str symbol = node->value.str;
1123 FunMap *fun = find_fun(scope, symbol);
1124 if (!fun) {
1125 eprintln(
1126 "%s:%d:%d: error: function '%s' doesn't exist in current "
1127 "scope ",
1128 a->file_name, node->line, node->col, symbol);
1129 a->err = true;
1130 return cstr("");
1131 }
1132 FindSymbolResult sym = find_symbol(scope, symbol);
1133 node->unique_name = str_concat(cstr("."), symbol, a->storage);
1134 node->unique_name =
1135 str_concat(node->unique_name,
1136 str_from_int(sym.scope->id, a->storage), a->storage);
1137 1412
1138 // Check that actual parameters typecheck 1413 // if (strset_lookup(&set, field_name)) {
1139 Str args = cstr(""); 1414 // eprintln(
1140 for (sz i = 0; i < array_size(node->elements); i++) { 1415 // "%s:%d:%d: error: field '%s' already present
1141 Node *expr = node->elements[i]; 1416 // in struct " "literal", a->file_name,
1142 expr->parent = node; 1417 // next->line, next->col, field_name);
1143 Str type = type_inference(a, expr, scope); 1418 // a->err = true;
1144 args = str_concat(args, type, a->storage); 1419 // } else {
1145 if (i != array_size(node->elements) - 1) { 1420 // strset_insert(&set, field_name, a->storage);
1146 args = str_concat(args, cstr(","), a->storage); 1421 // }
1147 } 1422 // typecheck_lit_field(a, next, s.scope, field_name);
1148 } 1423 // }
1149 if (!args.size) { 1424 // node->type = name;
1150 args = cstr("nil"); 1425 // return node->type;
1151 }
1152 Str expected = fun->val.param_type;
1153 if (!str_eq(args, expected) && !str_eq(expected, cstr("..."))) {
1154 eprintln(
1155 "%s:%d:%d: error: mismatched parameter types: %s expected "
1156 "%s",
1157 a->file_name, node->line, node->col, args, expected);
1158 a->err = true;
1159 }
1160 node->type = fun->val.return_type;
1161 return node->type;
1162 } break; 1426 } break;
1427 case NODE_FUNCALL: return analyze_funcall(a, node, scope);
1163 case NODE_BLOCK: { 1428 case NODE_BLOCK: {
1164 scope = typescope_alloc(a, scope); 1429 scope = scope_alloc(a, scope);
1165 Str type; 1430 Str type;
1166 for (sz i = 0; i < array_size(node->elements); i++) { 1431 for (sz i = 0; i < array_size(node->elements); i++) {
1167 Node *expr = node->elements[i]; 1432 Node *expr = node->elements[i];
1168 expr->parent = node;
1169 type = type_inference(a, expr, scope); 1433 type = type_inference(a, expr, scope);
1170 if (str_has_prefix(type, cstr("ret:")) || 1434 if (str_has_prefix(type, cstr("ret:")) ||
1171 str_has_prefix(type, cstr("flow:"))) { 1435 str_has_prefix(type, cstr("flow:"))) {
@@ -1177,16 +1441,29 @@ type_inference(Analyzer *a, Node *node, Scope *scope) {
1177 } break; 1441 } break;
1178 case NODE_RETURN: { 1442 case NODE_RETURN: {
1179 if (!scope->name.size) { 1443 if (!scope->name.size) {
1180 emit_semantic_error( 1444 SEMANTIC_ERROR(a, node, "return statement outside a function");
1181 a, node, cstr("return statement outside a function")); 1445 }
1182 a->err = true; 1446 sz ret_arity = array_size(node->elements);
1183 return cstr(""); 1447 if (ret_arity != a->current_function->return_arity) {
1448 SEMANTIC_ERROR(a, node,
1449 "invalid number of values in return expression");
1184 } 1450 }
1185 Str ret_type = cstr("ret:"); 1451 Str ret_type = cstr("ret:");
1186 for (sz i = 0; i < array_size(node->elements); i++) { 1452 for (sz i = 0; i < array_size(node->elements); i++) {
1187 Node *expr = node->elements[i]; 1453 Node *expr = node->elements[i];
1188 expr->parent = node;
1189 Str type = type_inference(a, expr, scope); 1454 Str type = type_inference(a, expr, scope);
1455 Str expected = a->current_function->returns[i];
1456 if (!str_eq(type, expected)) {
1457 bool int_types = strset_lookup(&a->integer_types, type) &&
1458 strset_lookup(&a->integer_types, expected);
1459 bool float_types = strset_lookup(&a->float_types, type) &&
1460 strset_lookup(&a->float_types, expected);
1461 if (!int_types && !float_types) {
1462 SEMANTIC_ERROR(a, node,
1463 "mismatched return type %s, expected %s",
1464 type, expected);
1465 }
1466 }
1190 ret_type = str_concat(ret_type, type, a->storage); 1467 ret_type = str_concat(ret_type, type, a->storage);
1191 if (i != array_size(node->elements) - 1) { 1468 if (i != array_size(node->elements) - 1) {
1192 ret_type = str_concat(ret_type, cstr(","), a->storage); 1469 ret_type = str_concat(ret_type, cstr(","), a->storage);
@@ -1211,138 +1488,17 @@ type_inference(Analyzer *a, Node *node, Scope *scope) {
1211 parent = parent->parent; 1488 parent = parent->parent;
1212 } 1489 }
1213 if (!inside_loop) { 1490 if (!inside_loop) {
1214 eprintln( 1491 SEMANTIC_ERROR(a, node,
1215 "%s:%d:%d: error: control flow statement outside a loop", 1492 "control flow statement outside a loop");
1216 a->file_name, node->line, node->col);
1217 a->err = true;
1218 return cstr("");
1219 } 1493 }
1220 node->type = cstr("flow:"); 1494 node->type = cstr("flow:nil");
1221 return node->type;
1222 } break;
1223 case NODE_FUN: {
1224 node->type = cstr("nil");
1225 Scope *prev_scope = scope;
1226 scope = typescope_alloc(a, scope);
1227 Str param_type = cstr("");
1228 for (sz i = 0; i < array_size(node->func.params); i++) {
1229 Node *param = node->func.params[i];
1230 param->parent = node;
1231 Str symbol = param->param.name->value.str;
1232 Str type = param->param.type->value.str;
1233 if (param->param.type->is_ptr) {
1234 type = str_concat(cstr("@"), type, a->storage);
1235 }
1236 if (param->param.type->kind == NODE_ARR_TYPE) {
1237 type = str_concat(cstr("@"), type, a->storage);
1238 }
1239 param->param.name->type =
1240 type_inference(a, param->param.type, scope);
1241 param->type = type;
1242 symmap_insert(&scope->symbols, symbol,
1243 (Symbol){.name = type, .kind = SYM_PARAM},
1244 a->storage);
1245 param_type = str_concat(param_type, type, a->storage);
1246 if (i != array_size(node->func.params) - 1) {
1247 param_type = str_concat(param_type, cstr(","), a->storage);
1248 }
1249 symbol = str_concat(cstr("."), symbol, a->storage);
1250 symbol = str_concat(symbol, str_from_int(scope->id, a->storage),
1251 a->storage);
1252 param->unique_name = symbol;
1253 }
1254 if (!param_type.size) {
1255 param_type = cstr("nil");
1256 }
1257 node->type_params = param_type;
1258
1259 Str ret_type = cstr("");
1260 for (sz i = 0; i < array_size(node->func.ret); i++) {
1261 Node *expr = node->func.ret[i];
1262 expr->parent = node;
1263 Str type = type_inference(a, expr, scope);
1264 if (expr->is_ptr) {
1265 type = str_concat(cstr("@"), type, a->storage);
1266 }
1267 if (expr->kind == NODE_ARR_TYPE) {
1268 type = str_concat(cstr("@"), type, a->storage);
1269 }
1270 ret_type = str_concat(ret_type, type, a->storage);
1271 if (i != array_size(node->func.ret) - 1) {
1272 ret_type = str_concat(ret_type, cstr(","), a->storage);
1273 }
1274 }
1275 if (!ret_type.size) {
1276 ret_type = cstr("nil");
1277 }
1278 node->type_returns = ret_type;
1279
1280 Str symbol = node->func.name->value.str;
1281 if (prev_scope->parent != NULL) {
1282 if (symmap_lookup(&prev_scope->symbols, symbol)) {
1283 eprintln(
1284 "%s:%d:%d: error: function '%s' already defined in "
1285 "current "
1286 "scope ",
1287 a->file_name, node->var.name->line, node->var.name->col,
1288 symbol);
1289 a->err = true;
1290 return cstr("");
1291 }
1292 symmap_insert(&prev_scope->symbols, symbol,
1293 (Symbol){.name = symbol, .kind = SYM_FUN},
1294 a->storage);
1295 }
1296 scope->name = symbol;
1297 funmap_insert(&prev_scope->funcs, symbol,
1298 (Fun){.name = symbol,
1299 .param_type = param_type,
1300 .return_type = ret_type},
1301 a->storage);
1302 symbol = str_concat(cstr("."), symbol, a->storage);
1303 symbol = str_concat(
1304 symbol, str_from_int(prev_scope->id, a->storage), a->storage);
1305 node->unique_name = symbol;
1306
1307 if (node->func.body->kind == NODE_BLOCK) {
1308 Str type;
1309 for (sz i = 0; i < array_size(node->func.body->elements); i++) {
1310 Node *expr = node->func.body->elements[i];
1311 expr->parent = node;
1312 // TODO: block early return.
1313 type = type_inference(a, expr, scope);
1314 }
1315 if (!type.size) {
1316 type = cstr("nil");
1317 }
1318 node->func.body->type = type;
1319 } else {
1320 node->func.body->parent = node;
1321 type_inference(a, node->func.body, scope);
1322 }
1323
1324 // Ensure main body return matches the prototype.
1325 Str type = str_remove_prefix(node->func.body->type, cstr("ret:"));
1326 node->func.body->type = type;
1327 if (!str_eq(type, ret_type)) {
1328 eprintln(
1329 "%s:%d:%d: error: mismatched return type %s, expected %s",
1330 a->file_name, node->line, node->col, type, ret_type);
1331 a->err = true;
1332 }
1333
1334 // Ensure ALL return statements match the function prototype.
1335 typecheck_returns(a, node->func.body, ret_type);
1336
1337 // TODO: should return statements be allowed on let blocks?
1338 return node->type; 1495 return node->type;
1339 } break; 1496 } break;
1497 case NODE_FUN: return analyze_function(a, node, scope);
1340 default: { 1498 default: {
1341 eprintln( 1499 SEMANTIC_ERROR(a, node,
1342 "%s:%d:%d: error: type inference not implemented for node " 1500 "type inference not implemented for node type: %s",
1343 "type: %s", 1501 node_str[node->kind]);
1344 a->file_name, node->line, node->col, node_str[node->kind]);
1345 a->err = true;
1346 } break; 1502 } break;
1347 } 1503 }
1348 return cstr(""); 1504 return cstr("");
@@ -1350,121 +1506,309 @@ type_inference(Analyzer *a, Node *node, Scope *scope) {
1350 1506
1351void 1507void
1352symbolic_analysis(Analyzer *a, Parser *parser) { 1508symbolic_analysis(Analyzer *a, Parser *parser) {
1353 Scope *scope = typescope_alloc(a, NULL); 1509 Scope *scope = scope_alloc(a, NULL);
1354 assert(a); 1510 assert(a);
1355 assert(parser); 1511 assert(parser);
1512 // TODO: Move to the parser or before that?
1513 Node *root = node_alloc(parser, NODE_BLOCK, (Token){0});
1514 root->expressions = parser->nodes;
1515 Function global = {
1516 .index = a->fun_gen++,
1517 .id = cstr(".global"),
1518 .name = cstr(".global"),
1519 .type = cstr("(nil -> nil)"),
1520 .body = root,
1521 };
1522 FunMap *map = funmap_insert(&a->fun_map, global.id, global, a->storage);
1523 a->n_funcs++;
1524 a->current_function = &map->val;
1356 1525
1357 // Fill builtin tables. 1526 // Fill builtin tables.
1358 Str builtin_functions[] = { 1527 Function builtin_functions[] = {
1359 cstr("print"), 1528 {
1360 cstr("println"), 1529 .id = cstr("print"),
1530 .name = cstr("print"),
1531 .type = cstr("(... -> nil)"),
1532 .ret_type = cstr("nil"),
1533 },
1534 {
1535 .id = cstr("println"),
1536 .name = cstr("println"),
1537 .type = cstr("(... -> nil)"),
1538 .ret_type = cstr("nil"),
1539 },
1540 {
1541 .id = cstr("sizeof"),
1542 .name = cstr("sizeof"),
1543 .type = cstr("(:T -> Int)"),
1544 .ret_type = cstr("Int"),
1545 .param_arity = 1,
1546 .return_arity = 1,
1547 },
1361 }; 1548 };
1362 for (sz i = 0; i < LEN(builtin_functions); i++) { 1549 for (sz i = 0; i < LEN(builtin_functions); i++) {
1363 Str symbol = builtin_functions[i]; 1550 Function fun = builtin_functions[i];
1364 symmap_insert(&scope->symbols, symbol, 1551 symmap_insert(&scope->symbols, fun.id,
1365 (Symbol){.name = symbol, .kind = SYM_BUILTIN_FUN}, 1552 (Symbol){
1366 a->storage); 1553 .scope_id = scope->id,
1367 funmap_insert(&scope->funcs, symbol, 1554 .id = fun.id,
1368 (Fun){.name = symbol, 1555 .name = fun.id,
1369 .param_type = cstr("..."), 1556 .type = fun.type,
1370 .return_type = cstr("nil")}, 1557 .kind = SYM_BUILTIN_FUN,
1558 .fun = &builtin_functions[i],
1559 },
1371 a->storage); 1560 a->storage);
1372 } 1561 }
1373 Str builtin_types[] = { 1562 Type builtin_types[] = {
1374 cstr("u8"), cstr("s8"), cstr("u16"), cstr("s16"), 1563 // Nil.
1375 cstr("u32"), cstr("s32"), cstr("u64"), cstr("s64"), 1564 {.id = cstr("nil"), .size = 0, .element_size = 0},
1376 cstr("f32"), cstr("f64"), cstr("ptr"), cstr("int"), 1565
1377 cstr("uint"), cstr("str"), cstr("bool"), cstr("nil")}; 1566 // Architecture dependant.
1567 {.id = cstr("Int"), .size = 8, .element_size = 8},
1568 {.id = cstr("UInt"), .size = 8, .element_size = 8},
1569 {.id = cstr("Ptr"), .size = 8, .element_size = 8},
1570 {.id = cstr("Str"), .size = 16, .element_size = 16},
1571
1572 // Fixed integer types.
1573 {.id = cstr("Bool"), .size = 1, .element_size = 1},
1574 {.id = cstr("U8"), .size = 1, .element_size = 1},
1575 {.id = cstr("S8"), .size = 1, .element_size = 1},
1576 {.id = cstr("U16"), .size = 2, .element_size = 2},
1577 {.id = cstr("S16"), .size = 2, .element_size = 2},
1578 {.id = cstr("U32"), .size = 4, .element_size = 4},
1579 {.id = cstr("S32"), .size = 4, .element_size = 4},
1580 {.id = cstr("U64"), .size = 8, .element_size = 8},
1581 {.id = cstr("S64"), .size = 8, .element_size = 8},
1582
1583 // Fixed float types.
1584 {.id = cstr("F32"), .size = 4, .element_size = 4},
1585 {.id = cstr("F64"), .size = 8, .element_size = 8},
1586 };
1378 for (sz i = 0; i < LEN(builtin_types); i++) { 1587 for (sz i = 0; i < LEN(builtin_types); i++) {
1379 Str type = builtin_types[i]; 1588 Type type = builtin_types[i];
1380 symmap_insert(&scope->symbols, type, 1589 symmap_insert(&scope->symbols, type.id,
1381 (Symbol){.name = type, .kind = SYM_BUILTIN_TYPE}, 1590 (Symbol){
1591 .scope_id = scope->id,
1592 .id = type.id,
1593 .name = type.id,
1594 .type = cstr(":T"),
1595 .kind = SYM_BUILTIN_TYPE,
1596 .t = type,
1597 },
1382 a->storage); 1598 a->storage);
1383 } 1599 }
1384 Str numeric_types[] = { 1600 Str numeric_types[] = {
1385 cstr("u8"), cstr("s8"), cstr("u16"), cstr("s16"), cstr("u32"), 1601 cstr("U8"), cstr("S8"), cstr("U16"), cstr("S16"), cstr("U32"),
1386 cstr("s32"), cstr("u64"), cstr("s64"), cstr("f32"), cstr("f64"), 1602 cstr("S32"), cstr("U64"), cstr("S64"), cstr("F32"), cstr("F64"),
1387 cstr("ptr"), cstr("int"), cstr("uint"), 1603 cstr("Ptr"), cstr("Int"), cstr("UInt"),
1388 }; 1604 };
1389 for (sz i = 0; i < LEN(numeric_types); i++) { 1605 for (sz i = 0; i < LEN(numeric_types); i++) {
1390 Str type = numeric_types[i]; 1606 Str type = numeric_types[i];
1391 strset_insert(&a->numeric_types, type, a->storage); 1607 strset_insert(&a->numeric_types, type, a->storage);
1392 } 1608 }
1609 Str unsigned_ints[] = {
1610 cstr("U8"), cstr("U16"), cstr("U32"),
1611 cstr("U64"), cstr("Ptr"), cstr("UInt"),
1612 };
1613 for (sz i = 0; i < LEN(unsigned_ints); i++) {
1614 Str type = unsigned_ints[i];
1615 strset_insert(&a->uint_types, type, a->storage);
1616 }
1617 Str signed_ints[] = {
1618 cstr("S8"), cstr("S16"), cstr("S32"), cstr("S64"), cstr("Int"),
1619 };
1620 for (sz i = 0; i < LEN(signed_ints); i++) {
1621 Str type = signed_ints[i];
1622 strset_insert(&a->sint_types, type, a->storage);
1623 }
1393 Str integer_types[] = { 1624 Str integer_types[] = {
1394 cstr("u8"), cstr("s8"), cstr("u16"), cstr("s16"), 1625 cstr("U8"), cstr("S8"), cstr("U16"), cstr("S16"),
1395 cstr("u32"), cstr("s32"), cstr("u64"), cstr("s64"), 1626 cstr("U32"), cstr("S32"), cstr("U64"), cstr("S64"),
1396 cstr("ptr"), cstr("int"), cstr("uint"), 1627 cstr("Ptr"), cstr("Int"), cstr("UInt"),
1397 }; 1628 };
1398 for (sz i = 0; i < LEN(integer_types); i++) { 1629 for (sz i = 0; i < LEN(integer_types); i++) {
1399 Str type = integer_types[i]; 1630 Str type = integer_types[i];
1400 strset_insert(&a->integer_types, type, a->storage); 1631 strset_insert(&a->integer_types, type, a->storage);
1401 } 1632 }
1633 Str float_types[] = {
1634 cstr("F32"),
1635 cstr("F64"),
1636 };
1637 for (sz i = 0; i < LEN(float_types); i++) {
1638 Str type = float_types[i];
1639 strset_insert(&a->float_types, type, a->storage);
1640 }
1402 // Find top level function declarations. 1641 // Find top level function declarations.
1403 for (sz i = 0; i < array_size(parser->nodes); i++) { 1642 for (sz i = 0; i < array_size(parser->nodes); i++) {
1404 Node *root = parser->nodes[i]; 1643 Node *root = parser->nodes[i];
1405 if (root->kind == NODE_FUN) { 1644 if (root->kind == NODE_FUN) {
1406 Str symbol = root->func.name->value.str; 1645 declare_fun(a, root, scope);
1407 if (symmap_lookup(&scope->symbols, symbol)) { 1646 }
1408 eprintln( 1647 if (root->kind == NODE_STRUCT) {
1409 "%s:%d:%d: error: function '%s' already defined in " 1648 register_struct(a, root, scope);
1410 "current " 1649 }
1411 "scope ", 1650 }
1412 a->file_name, root->var.name->line, root->var.name->col, 1651 if (a->err == true) {
1413 symbol); 1652 return;
1414 a->err = true; 1653 }
1415 } 1654
1416 Str param_type = cstr(""); 1655 // Recursively fill symbol tables.
1417 for (sz i = 0; i < array_size(root->func.params); i++) { 1656 for (sz i = 0; i < array_size(parser->nodes); i++) {
1418 Node *param = root->func.params[i]; 1657 Node *root = parser->nodes[i];
1419 Str type = param->param.type->value.str; 1658 type_inference(a, root, scope);
1420 if (param->param.type->is_ptr) { 1659 // NOTE: We may want to synchronize errors on type_inference for
1421 type = str_concat(cstr("@"), type, a->storage); 1660 // specific statements instead of just roots.
1422 } 1661 a->panic = false;
1423 if (param->param.type->kind == NODE_ARR_TYPE) { 1662 }
1424 type = str_concat(cstr("@"), type, a->storage); 1663}
1664
1665int
1666symbol_sort_fn(const void *a, const void *b) {
1667 const Symbol *x = a;
1668 const Symbol *y = b;
1669
1670 // Sort by kind first.
1671 {
1672 int comp = x->kind - y->kind;
1673 if (comp < 0) return -1;
1674 if (comp > 0) return 1;
1675 }
1676
1677 // Sort by scope id.
1678 {
1679 int comp = x->scope_id - y->scope_id;
1680 if (comp < 0) return -1;
1681 if (comp > 0) return 1;
1682 }
1683
1684 // Finally, sort by ID.
1685 return str_cmp(x->id, y->id);
1686}
1687
1688int
1689fun_sort_fn(const void *a, const void *b) {
1690 const Function *x = a;
1691 const Function *y = b;
1692 return x->index - y->index;
1693}
1694
1695void
1696print_symbols(Analyzer a) {
1697 Arena scratch = *a.storage;
1698 {
1699 Symbol *symbols = NULL;
1700 Symbol *types = NULL;
1701 for (sz i = 0; i < array_size(a.scopes); i++) {
1702 Scope *scope = a.scopes[i];
1703 SymbolMapIter iter = symmap_iterator(scope->symbols, &scratch);
1704 SymbolMap *m = symmap_next(&iter, &scratch);
1705 while (m) {
1706 Symbol sym = m->val;
1707#if DEBUG == 0
1708 if (sym.kind == SYM_BUILTIN_TYPE ||
1709 sym.kind == SYM_BUILTIN_FUN) {
1710 m = symmap_next(&iter, &scratch);
1711 continue;
1425 } 1712 }
1426 param_type = str_concat(param_type, type, a->storage); 1713#endif
1427 if (i != array_size(root->func.params) - 1) { 1714 if (sym.kind == SYM_STRUCT) {
1428 param_type = str_concat(param_type, cstr(","), a->storage); 1715 array_push(types, sym, &scratch);
1429 } 1716 }
1717 array_push(symbols, sym, &scratch);
1718 m = symmap_next(&iter, &scratch);
1430 } 1719 }
1431 if (!param_type.size) { 1720 }
1432 param_type = cstr("nil"); 1721 array_sort(symbols, symbol_sort_fn);
1722
1723 println(
1724 "═════════════════════════════════════════"
1725 "═════════════════════════════════════════");
1726 println("SYMBOLS");
1727 println(
1728 "═════════════════════════════════════════"
1729 "═════════════════════════════════════════");
1730 println("scope\tid\tname\ttype\tkind");
1731 for (sz i = 0; i < array_size(symbols); i++) {
1732 Symbol sym = symbols[i];
1733 if (sym.kind == SYM_TYPE || sym.kind == SYM_STRUCT) {
1734 println("%d\t%s\t%s\t%s (size %d / %d)\t%s", sym.scope_id,
1735 sym.id, sym.name, sym.type, sym.t.size,
1736 sym.t.element_size, sym_str[sym.kind]);
1737 } else if (sym.kind == SYM_STRUCT_FIELD) {
1738 println("%d\t%s\t%s\t%s (size %d offset %d)\t%s", sym.scope_id,
1739 sym.id, sym.name, sym.type, sym.t.size, sym.t.offset,
1740 sym_str[sym.kind]);
1741 } else {
1742 println("%d\t%s\t%s\t%s\t%s", sym.scope_id, sym.id, sym.name,
1743 sym.type, sym_str[sym.kind]);
1433 } 1744 }
1434 root->type_params = param_type; 1745 }
1435 1746
1436 Str ret_type = cstr(""); 1747 if (types) {
1437 for (sz i = 0; i < array_size(root->func.ret); i++) { 1748 println(
1438 Node *expr = root->func.ret[i]; 1749 "═════════════════════════════════════════"
1439 Str type = expr->value.str; 1750 "═════════════════════════════════════════");
1440 if (expr->is_ptr) { 1751 println("STRUCTS");
1441 type = str_concat(cstr("@"), type, a->storage); 1752 println(
1442 } 1753 "═════════════════════════════════════════"
1443 if (expr->kind == NODE_ARR_TYPE) { 1754 "═════════════════════════════════════════");
1444 type = str_concat(cstr("@"), type, a->storage); 1755 println("name\ttype\tsize\tpadding\telement_size");
1445 } 1756 for (sz i = 0; i < array_size(types); i++) {
1446 ret_type = str_concat(ret_type, type, a->storage); 1757 Symbol sym = types[i];
1447 if (i != array_size(root->func.ret) - 1) { 1758 println("%s\t%s\t%d\t\t%d", sym.name, sym.id, sym.t.size,
1448 ret_type = str_concat(ret_type, cstr(","), a->storage); 1759 sym.t.element_size);
1760 for (sz i = 0; i < array_size(sym.t.fields); i++) {
1761 StructField field = sym.t.fields[i];
1762 if (i < array_size(sym.t.fields) - 1) {
1763 println("├─ %s\t%s\t%d\t%d\t%d", field.name,
1764 field.type.id, field.type.size, field.padding,
1765 field.type.element_size);
1766 } else {
1767 println("└─ %s\t%s\t%d\t%d\t%d", field.name,
1768 field.type.id, field.type.size, field.padding,
1769 field.type.element_size);
1770 }
1449 } 1771 }
1450 } 1772 }
1451 if (!ret_type.size) {
1452 ret_type = cstr("nil");
1453 }
1454 funmap_insert(&scope->funcs, symbol,
1455 (Fun){.name = symbol,
1456 .param_type = param_type,
1457 .return_type = ret_type},
1458 a->storage);
1459 symmap_insert(&scope->symbols, symbol,
1460 (Symbol){.name = symbol, .kind = SYM_FUN},
1461 a->storage);
1462 } 1773 }
1463 } 1774 }
1464 // Recursively fill symbol tables. 1775
1465 for (sz i = 0; i < array_size(parser->nodes); i++) { 1776 // Print all functions.
1466 Node *root = parser->nodes[i]; 1777 Function *functions = NULL;
1467 type_inference(a, root, scope); 1778 {
1779 FunMapIter iter = funmap_iterator(a.fun_map, &scratch);
1780 FunMap *m = funmap_next(&iter, &scratch);
1781 while (m) {
1782 Function fun = m->val;
1783 array_push(functions, fun, &scratch);
1784 m = funmap_next(&iter, &scratch);
1785 }
1786 array_sort(functions, fun_sort_fn);
1787
1788 println(
1789 "═════════════════════════════════════════"
1790 "═════════════════════════════════════════");
1791 println("FUNCTIONS");
1792 println(
1793 "═════════════════════════════════════════"
1794 "═════════════════════════════════════════");
1795 for (sz i = 0; i < array_size(functions); i++) {
1796 Function fun = functions[i];
1797 println("function %d: %s %s [P: %d R: %d V: %d]", fun.index, fun.id,
1798 fun.type, fun.param_arity, fun.return_arity, fun.n_vars);
1799 if (fun.n_vars) {
1800 println("index\tname\ttype\tfrom\tto\tsize\tpadding");
1801 for (sz i = 0; i < array_size(fun.vars); i++) {
1802 Variable var = fun.vars[i];
1803 println("%d\t%s\t%s\t%x{6}\t%x{6}\t%d\t%d", var.idx,
1804 var.name, var.type.id, var.offset,
1805 var.offset + var.size, var.size, var.padding);
1806 }
1807 }
1808 println(
1809 "─────────────────────────────────────────"
1810 "─────────────────────────────────────────");
1811 }
1468 } 1812 }
1469} 1813}
1470 1814
diff --git a/src/vm.c b/src/vm.c
index 0929fb6..f80cf95 100644
--- a/src/vm.c
+++ b/src/vm.c
@@ -4,11 +4,22 @@
4#include "badlib.h" 4#include "badlib.h"
5#include "compiler.c" 5#include "compiler.c"
6 6
7//
8// The current activation record procedure for the VM is as follows:
9// [caller][callee ]
10// [ .... ][ RET VAL ][ PARAMS ][ LOCALS ][ REGISTERS ][ RET META ]
11// ^
12// frame pointer
13//
14// The caller is responsible for allocating the return memory and the
15// parameter memory and filling the param data before OP_CALL.
16//
17
7#define N_CONST 256 18#define N_CONST 256
8#define STACK_SIZE KB(64) 19#define STACK_SIZE MB(4)
9typedef struct VM { 20typedef struct VM {
10 Chunk *main; 21 Chunk *current_chunk;
11 Chunk *chunk; 22 Chunk **chunks;
12 u8 stack[STACK_SIZE]; 23 u8 stack[STACK_SIZE];
13 Instruction *ip; 24 Instruction *ip;
14 u8 *sp; 25 u8 *sp;
@@ -17,48 +28,48 @@ typedef struct VM {
17} VM; 28} VM;
18 29
19void 30void
20vm_init(VM *vm, Chunk *chunk) { 31vm_init(VM *vm, Chunk **chunks) {
21 assert(vm); 32 assert(vm);
22 assert(chunk); 33 assert(chunks);
23 assert(chunk->code); 34 Chunk *chunk = chunks[0];
24 vm->main = chunk; 35 vm->chunks = chunks;
25 vm->chunk = chunk; 36 vm->current_chunk = chunk;
26 vm->ip = vm->chunk->code; 37 vm->ip = vm->current_chunk->code;
27 vm->fp = (u64 *)vm->stack; 38 vm->fp = (u64 *)vm->stack;
28 vm->sp = vm->stack + chunk->var_off; 39 vm->sp = vm->stack + chunk->fun.var_offset;
29 vm->regs = (Constant *)vm->sp; 40 vm->regs = (Constant *)vm->sp;
30 vm->sp += sizeof(Constant) * chunk->reg_idx; 41 vm->sp += sizeof(Constant) * chunk->reg_idx;
31} 42}
32 43
33#define OP_UNARY(OP, TYPE) \ 44#define OP_UNARY(OP, TYPE) \
34 do { \ 45 do { \
35 u8 dst = instruction.dst; \ 46 sz dst = instruction.dst; \
36 u8 src_a = instruction.a; \ 47 sz src_a = instruction.a; \
37 vm->regs[dst].TYPE = OP vm->regs[src_a].TYPE; \ 48 vm->regs[dst].TYPE = OP vm->regs[src_a].TYPE; \
38 } while (0); 49 } while (0);
39 50
40#define OP_UNARY_CONST(OP, TYPE) \ 51#define OP_UNARY_CONST(OP, TYPE) \
41 do { \ 52 do { \
42 u8 dst = instruction.dst; \ 53 sz dst = instruction.dst; \
43 u8 src_a = instruction.a; \ 54 sz src_a = instruction.a; \
44 vm->regs[dst].TYPE = OP vm->chunk->constants[src_a].TYPE; \ 55 vm->regs[dst].TYPE = OP vm->current_chunk->constants[src_a].TYPE; \
45 } while (0); 56 } while (0);
46 57
47#define OP_BINARY(OP, TYPE) \ 58#define OP_BINARY(OP, TYPE) \
48 do { \ 59 do { \
49 u8 dst = instruction.dst; \ 60 sz dst = instruction.dst; \
50 u8 src_a = instruction.a; \ 61 sz src_a = instruction.a; \
51 u8 src_b = instruction.b; \ 62 sz src_b = instruction.b; \
52 vm->regs[dst].TYPE = vm->regs[src_a].TYPE OP vm->regs[src_b].TYPE; \ 63 vm->regs[dst].TYPE = vm->regs[src_a].TYPE OP vm->regs[src_b].TYPE; \
53 } while (0); 64 } while (0);
54 65
55#define OP_BINARY_CONST(OP, TYPE) \ 66#define OP_BINARY_CONST(OP, TYPE) \
56 do { \ 67 do { \
57 u8 dst = instruction.dst; \ 68 sz dst = instruction.dst; \
58 u8 src_a = instruction.a; \ 69 sz src_a = instruction.a; \
59 u8 src_b = instruction.b; \ 70 sz src_b = instruction.b; \
60 vm->regs[dst].TYPE = \ 71 vm->regs[dst].TYPE = \
61 vm->regs[src_a].TYPE OP vm->chunk->constants[src_b].TYPE; \ 72 vm->regs[src_a].TYPE OP vm->current_chunk->constants[src_b].TYPE; \
62 } while (0); 73 } while (0);
63 74
64#include <math.h> 75#include <math.h>
@@ -66,21 +77,21 @@ vm_init(VM *vm, Chunk *chunk) {
66void 77void
67vm_run(VM *vm) { 78vm_run(VM *vm) {
68 assert(vm); 79 assert(vm);
69 assert(vm->chunk); 80 assert(vm->current_chunk);
70 assert(vm->ip); 81 assert(vm->ip);
71 println("VM running..."); 82 println("VM running...");
72 while (true) { 83 while (true) {
73 Instruction instruction = *vm->ip++; 84 Instruction instruction = *vm->ip++;
74#if DEBUG == 1 85#if DEBUG == 1
75 print("IP: %d -> ", vm->ip - vm->chunk->code - 1); 86 print("IP: %d -> ", vm->ip - vm->current_chunk->code - 1);
76 disassemble_instruction(instruction); 87 disassemble_instruction(*vm->current_chunk, instruction);
77#endif 88#endif
78 89
79 switch (instruction.op) { 90 switch (instruction.op) {
80 case OP_LD64K: { 91 case OP_LDCONST: {
81 u8 dst = instruction.dst; 92 sz dst = instruction.dst;
82 u8 src_a = instruction.a; 93 sz src_a = instruction.a;
83 vm->regs[dst].i = vm->chunk->constants[src_a].i; 94 vm->regs[dst].i = vm->current_chunk->constants[src_a].i;
84 } break; 95 } break;
85 case OP_NOT: OP_UNARY(!, i) break; 96 case OP_NOT: OP_UNARY(!, i) break;
86 case OP_BITNOT: OP_UNARY(~, i) break; 97 case OP_BITNOT: OP_UNARY(~, i) break;
@@ -107,11 +118,11 @@ vm_run(VM *vm) {
107 case OP_MULF: OP_BINARY(*, f) break; 118 case OP_MULF: OP_BINARY(*, f) break;
108 case OP_DIVF: OP_BINARY(/, f) break; 119 case OP_DIVF: OP_BINARY(/, f) break;
109 case OP_MODF: { 120 case OP_MODF: {
110 u8 dst = instruction.dst; 121 sz dst = instruction.dst;
111 u8 src_a = instruction.a; 122 sz src_a = instruction.a;
112 u8 src_b = instruction.b; 123 sz src_b = instruction.b;
113 vm->regs[dst].f = 124 vm->regs[dst].f = fmod(vm->regs[src_a].f,
114 fmod(vm->regs[src_a].f, vm->chunk->constants[src_b].f); 125 vm->current_chunk->constants[src_b].f);
115 } break; 126 } break;
116 case OP_NOTI: OP_UNARY_CONST(!, i) break; 127 case OP_NOTI: OP_UNARY_CONST(!, i) break;
117 case OP_BITNOTI: OP_UNARY_CONST(~, i) break; 128 case OP_BITNOTI: OP_UNARY_CONST(~, i) break;
@@ -138,159 +149,334 @@ vm_run(VM *vm) {
138 case OP_MULFI: OP_BINARY_CONST(*, f) break; 149 case OP_MULFI: OP_BINARY_CONST(*, f) break;
139 case OP_DIVFI: OP_BINARY_CONST(/, f) break; 150 case OP_DIVFI: OP_BINARY_CONST(/, f) break;
140 case OP_MODFI: { 151 case OP_MODFI: {
141 u8 dst = instruction.dst; 152 sz dst = instruction.dst;
142 u8 src_a = instruction.a; 153 sz src_a = instruction.a;
143 u8 src_b = instruction.b; 154 sz src_b = instruction.b;
144 vm->regs[dst].f = 155 vm->regs[dst].f = fmod(vm->regs[src_a].f,
145 fmod(vm->regs[src_a].f, vm->chunk->constants[src_b].f); 156 vm->current_chunk->constants[src_b].f);
146 } break; 157 } break;
147 case OP_STGVAR: { 158 case OP_STGVAR: {
148 u8 dst = instruction.dst; 159 sz dst = instruction.dst;
149 u8 src = instruction.a; 160 sz src = instruction.a;
150 Variable var = vm->main->vars[dst]; 161 Variable var = vm->chunks[0]->fun.vars[dst];
151 s64 *stack = (s64 *)&vm->stack[var.offset]; 162 s64 *stack = (s64 *)&vm->stack[var.offset];
152 *stack = vm->regs[src].i; 163 *stack = vm->regs[src].i;
153 } break; 164 } break;
154 case OP_STGVARI: { 165 case OP_STGVARI: {
155 u8 dst = instruction.dst; 166 sz dst = instruction.dst;
156 u8 src = instruction.a; 167 sz src = instruction.a;
157 Variable var = vm->main->vars[dst]; 168 Variable var = vm->chunks[0]->fun.vars[dst];
158 s64 *stack = (s64 *)&vm->stack[var.offset]; 169 s64 *stack = (s64 *)&vm->stack[var.offset];
159 *stack = vm->chunk->constants[src].i; 170 *stack = vm->current_chunk->constants[src].i;
160 } break; 171 } break;
161 case OP_LDGVAR: { 172 case OP_LDGVAR: {
162 u8 dst = instruction.dst; 173 sz dst = instruction.dst;
163 u8 src = instruction.a; 174 sz src = instruction.a;
164 Variable var = vm->main->vars[src]; 175 Variable var = vm->chunks[0]->fun.vars[src];
165 s64 *stack = (s64 *)&vm->stack[var.offset]; 176 s64 *stack = (s64 *)&vm->stack[var.offset];
166 vm->regs[dst].i = *stack; 177 vm->regs[dst].i = *stack;
167 } break; 178 } break;
168 case OP_LDGADDR: { 179 case OP_LDGADDR: {
169 u8 dst = instruction.dst; 180 sz dst = instruction.dst;
170 u8 src = instruction.a; 181 sz src = instruction.a;
171 Variable var = vm->main->vars[src]; 182 Variable var = vm->chunks[0]->fun.vars[src];
172 s64 *stack = (s64 *)&vm->stack[var.offset]; 183 s64 *stack = (s64 *)&vm->stack[var.offset];
173 vm->regs[dst].ptr = (ptrsize)stack; 184 vm->regs[dst].ptr = (ptrsize)stack;
174 } break; 185 } break;
175 case OP_STLVAR: { 186 case OP_STLVAR: {
176 u8 dst = instruction.dst; 187 sz dst = instruction.dst;
177 u8 src = instruction.a; 188 sz src = instruction.a;
178 Variable var = vm->chunk->vars[dst]; 189 Variable var = vm->current_chunk->fun.vars[dst];
179 vm->fp[var.offset / 8] = vm->regs[src].i; 190 vm->fp[var.offset / 8] = vm->regs[src].i;
180 } break; 191 } break;
181 case OP_STLVARI: { 192 case OP_STLVARI: {
182 u8 dst = instruction.dst; 193 sz dst = instruction.dst;
183 u8 src = instruction.a; 194 sz src = instruction.a;
184 Variable var = vm->chunk->vars[dst]; 195 Variable var = vm->current_chunk->fun.vars[dst];
185 vm->fp[var.offset / 8] = vm->chunk->constants[src].i; 196 vm->fp[var.offset / 8] = vm->current_chunk->constants[src].i;
186 } break; 197 } break;
187 case OP_LDLVAR: { 198 case OP_LDLVAR: {
188 u8 dst = instruction.dst; 199 sz dst = instruction.dst;
189 u8 src = instruction.a; 200 sz src = instruction.a;
190 Variable var = vm->chunk->vars[src]; 201 Variable var = vm->current_chunk->fun.vars[src];
191 vm->regs[dst].i = vm->fp[var.offset / 8]; 202 vm->regs[dst].i = vm->fp[var.offset / 8];
192 } break; 203 } break;
193 case OP_LDLADDR: { 204 case OP_LDLADDR: {
194 u8 dst = instruction.dst; 205 sz dst = instruction.dst;
195 u8 src = instruction.a; 206 sz src = instruction.a;
196 Variable var = vm->chunk->vars[src]; 207 Variable var = vm->current_chunk->fun.vars[src];
197 vm->regs[dst].i = (ptrsize)&vm->fp[var.offset / 8]; 208 vm->regs[dst].ptr = (ptrsize)&vm->fp[var.offset / 8];
198 } break; 209 } break;
199 case OP_LDSTR: { 210 case OP_LDSTR: {
200 u8 dst = instruction.dst; 211 sz dst = instruction.dst;
201 u8 src = instruction.a; 212 sz src = instruction.a;
202 Str *str = &vm->chunk->strings[src]; 213 Str *str = &vm->current_chunk->strings[src];
203 vm->regs[dst].ptr = (ptrsize)str; 214 vm->regs[dst].ptr = (ptrsize)str;
204 } break; 215 } break;
216 case OP_ST64K: {
217 s64 *addr = (s64 *)vm->regs[instruction.a].ptr;
218 *addr = vm->regs[instruction.dst].i & 0xffffffffffffffff;
219 } break;
205 case OP_ST64I: { 220 case OP_ST64I: {
206 sz value = vm->regs[instruction.dst].ptr; 221 sz value = vm->regs[instruction.dst].ptr;
207 s64 *addr = (s64 *)vm->regs[instruction.a].ptr; 222 s64 *addr = (s64 *)vm->regs[instruction.a].ptr;
208 sz offset = vm->chunk->constants[instruction.b].i; 223 sz offset = vm->current_chunk->constants[instruction.b].i;
209 addr[offset] = value; 224 addr[offset] = value & 0xffffffffffffffff;
210 } break; 225 } break;
211 case OP_ST64: { 226 case OP_ST64: {
212 sz value = vm->regs[instruction.dst].i; 227 sz value = vm->regs[instruction.dst].i;
213 s64 *addr = (s64 *)vm->regs[instruction.a].ptr; 228 s64 *addr = (s64 *)vm->regs[instruction.a].ptr;
214 sz offset = vm->regs[instruction.b].i; 229 sz offset = vm->regs[instruction.b].i;
215 addr[offset] = value; 230 addr[offset] = value & 0xffffffffffffffff;
231 } break;
232 case OP_ST32K: {
233 s32 *addr = (s32 *)vm->regs[instruction.a].ptr;
234 *addr = vm->regs[instruction.dst].i & 0xffffffff;
235 } break;
236 case OP_ST32I: {
237 sz value = vm->regs[instruction.dst].ptr;
238 s32 *addr = (s32 *)vm->regs[instruction.a].ptr;
239 sz offset = vm->current_chunk->constants[instruction.b].i;
240 addr[offset] = value & 0xffffffff;
241 } break;
242 case OP_ST32: {
243 sz value = vm->regs[instruction.dst].i;
244 s32 *addr = (s32 *)vm->regs[instruction.a].ptr;
245 sz offset = vm->regs[instruction.b].i;
246 addr[offset] = value & 0xffffffff;
247 } break;
248 case OP_ST16K: {
249 s16 *addr = (s16 *)vm->regs[instruction.a].ptr;
250 *addr = vm->regs[instruction.dst].i & 0xffff;
251 } break;
252 case OP_ST16I: {
253 sz value = vm->regs[instruction.dst].ptr;
254 s16 *addr = (s16 *)vm->regs[instruction.a].ptr;
255 sz offset = vm->current_chunk->constants[instruction.b].i;
256 addr[offset] = value & 0xffff;
257 } break;
258 case OP_ST16: {
259 sz value = vm->regs[instruction.dst].i;
260 s16 *addr = (s16 *)vm->regs[instruction.a].ptr;
261 sz offset = vm->regs[instruction.b].i;
262 addr[offset] = value & 0xffff;
263 } break;
264 case OP_ST8K: {
265 s8 *addr = (s8 *)vm->regs[instruction.a].ptr;
266 *addr = vm->regs[instruction.dst].i & 0xff;
267 } break;
268 case OP_ST8I: {
269 sz value = vm->regs[instruction.dst].ptr;
270 s8 *addr = (s8 *)vm->regs[instruction.a].ptr;
271 sz offset = vm->current_chunk->constants[instruction.b].i;
272 addr[offset] = value & 0xff;
273 } break;
274 case OP_ST8: {
275 sz value = vm->regs[instruction.dst].i;
276 s8 *addr = (s8 *)vm->regs[instruction.a].ptr;
277 sz offset = vm->regs[instruction.b].i;
278 addr[offset] = value & 0xff;
279 } break;
280 case OP_LD64K: {
281 s64 *addr = (s64 *)vm->regs[instruction.a].ptr;
282 vm->regs[instruction.dst].i = *addr & 0xffffffffffffffff;
216 } break; 283 } break;
217 case OP_LD64I: { 284 case OP_LD64I: {
218 s64 *addr = (s64 *)vm->regs[instruction.a].ptr; 285 s64 *addr = (s64 *)vm->regs[instruction.a].ptr;
219 sz offset = vm->chunk->constants[instruction.b].i; 286 sz offset = vm->current_chunk->constants[instruction.b].i;
220 vm->regs[instruction.dst].i = addr[offset]; 287 vm->regs[instruction.dst].i = addr[offset] & 0xffffffffffffffff;
221 } break; 288 } break;
222 case OP_LD64: { 289 case OP_LD64: {
223 s64 *addr = (s64 *)vm->regs[instruction.a].ptr; 290 s64 *addr = (s64 *)vm->regs[instruction.a].ptr;
224 sz offset = vm->regs[instruction.b].i; 291 sz offset = vm->regs[instruction.b].i;
225 vm->regs[instruction.dst].i = addr[offset]; 292 vm->regs[instruction.dst].i = addr[offset] & 0xffffffffffffffff;
293 } break;
294 case OP_LD32K: {
295 s32 *addr = (s32 *)vm->regs[instruction.a].ptr;
296 vm->regs[instruction.dst].i = *addr & 0xffffffff;
297 } break;
298 case OP_LD32I: {
299 s32 *addr = (s32 *)vm->regs[instruction.a].ptr;
300 sz offset = vm->current_chunk->constants[instruction.b].i;
301 vm->regs[instruction.dst].i = addr[offset] & 0xffffffff;
302 } break;
303 case OP_LD32: {
304 s32 *addr = (s32 *)vm->regs[instruction.a].ptr;
305 sz offset = vm->regs[instruction.b].i;
306 vm->regs[instruction.dst].i = addr[offset] & 0xffffffff;
307 } break;
308 case OP_LD16K: {
309 s16 *addr = (s16 *)vm->regs[instruction.a].ptr;
310 vm->regs[instruction.dst].i = *addr & 0xffff;
311 } break;
312 case OP_LD16I: {
313 s16 *addr = (s16 *)vm->regs[instruction.a].ptr;
314 sz offset = vm->current_chunk->constants[instruction.b].i;
315 vm->regs[instruction.dst].i = addr[offset] & 0xffff;
316 } break;
317 case OP_LD16: {
318 s16 *addr = (s16 *)vm->regs[instruction.a].ptr;
319 sz offset = vm->regs[instruction.b].i;
320 vm->regs[instruction.dst].i = addr[offset] & 0xffff;
321 } break;
322 case OP_LD8K: {
323 s8 *addr = (s8 *)vm->regs[instruction.a].ptr;
324 vm->regs[instruction.dst].i = *addr & 0xff;
325 } break;
326 case OP_LD8I: {
327 s8 *addr = (s8 *)vm->regs[instruction.a].ptr;
328 sz offset = vm->current_chunk->constants[instruction.b].i;
329 vm->regs[instruction.dst].i = addr[offset] & 0xff;
330 } break;
331 case OP_LD8: {
332 s8 *addr = (s8 *)vm->regs[instruction.a].ptr;
333 sz offset = vm->regs[instruction.b].i;
334 vm->regs[instruction.dst].i = addr[offset] & 0xff;
226 } break; 335 } break;
227 case OP_JMP: { 336 case OP_JMP: {
228 u8 dst = instruction.dst; 337 sz dst = instruction.dst;
229 sz pos = intintmap_lookup(&vm->chunk->labels, dst)->val; 338 sz pos = intintmap_lookup(&vm->current_chunk->labels, dst)->val;
230 vm->ip = vm->chunk->code + pos; 339 vm->ip = vm->current_chunk->code + pos;
231 } break; 340 } break;
232 case OP_JMPFI: { 341 case OP_JMPFI: {
233 u8 dst = instruction.dst; 342 sz dst = instruction.dst;
234 sz pos = intintmap_lookup(&vm->chunk->labels, dst)->val; 343 sz pos = intintmap_lookup(&vm->current_chunk->labels, dst)->val;
235 bool cond = vm->chunk->constants[instruction.a].i; 344 bool cond = vm->current_chunk->constants[instruction.a].i;
236 if (!cond) { 345 if (!cond) {
237 vm->ip = vm->chunk->code + pos; 346 vm->ip = vm->current_chunk->code + pos;
238 } 347 }
239 } break; 348 } break;
240 case OP_JMPTI: { 349 case OP_JMPTI: {
241 u8 dst = instruction.dst; 350 sz dst = instruction.dst;
242 sz pos = intintmap_lookup(&vm->chunk->labels, dst)->val; 351 sz pos = intintmap_lookup(&vm->current_chunk->labels, dst)->val;
243 bool cond = vm->chunk->constants[instruction.a].i; 352 bool cond = vm->current_chunk->constants[instruction.a].i;
244 if (cond) { 353 if (cond) {
245 vm->ip = vm->chunk->code + pos; 354 vm->ip = vm->current_chunk->code + pos;
246 } 355 }
247 } break; 356 } break;
248 case OP_JMPF: { 357 case OP_JMPF: {
249 u8 dst = instruction.dst; 358 sz dst = instruction.dst;
250 sz pos = intintmap_lookup(&vm->chunk->labels, dst)->val; 359 sz pos = intintmap_lookup(&vm->current_chunk->labels, dst)->val;
251 bool cond = vm->regs[instruction.a].i; 360 bool cond = vm->regs[instruction.a].i;
252 if (!cond) { 361 if (!cond) {
253 vm->ip = vm->chunk->code + pos; 362 vm->ip = vm->current_chunk->code + pos;
254 } 363 }
255 } break; 364 } break;
256 case OP_JMPT: { 365 case OP_JMPT: {
257 u8 dst = instruction.dst; 366 sz dst = instruction.dst;
258 sz pos = intintmap_lookup(&vm->chunk->labels, dst)->val; 367 sz pos = intintmap_lookup(&vm->current_chunk->labels, dst)->val;
259 bool cond = vm->regs[instruction.a].i; 368 bool cond = vm->regs[instruction.a].i;
260 if (cond) { 369 if (cond) {
261 vm->ip = vm->chunk->code + pos; 370 vm->ip = vm->current_chunk->code + pos;
262 } 371 }
263 } break; 372 } break;
264 case OP_MOV64: { 373 case OP_MOV64: {
265 u8 dst = instruction.dst; 374 sz dst = instruction.dst;
266 u8 src = instruction.a; 375 sz src = instruction.a;
267 vm->regs[dst] = vm->regs[src]; 376 vm->regs[dst] = vm->regs[src];
268 } break; 377 } break;
269 case OP_MOV32: { 378 case OP_MOV32: {
270 u8 dst = instruction.dst; 379 sz dst = instruction.dst;
271 u8 src = instruction.a; 380 sz src = instruction.a;
272 vm->regs[dst].i = vm->regs[src].i & 0xFFFFFFFF; 381 vm->regs[dst].i = vm->regs[src].i & 0xFFFFFFFF;
273 } break; 382 } break;
274 case OP_MOV16: { 383 case OP_MOV16: {
275 u8 dst = instruction.dst; 384 sz dst = instruction.dst;
276 u8 src = instruction.a; 385 sz src = instruction.a;
277 vm->regs[dst].i = vm->regs[src].i & 0xFFFF; 386 vm->regs[dst].i = vm->regs[src].i & 0xFFFF;
278 } break; 387 } break;
279 case OP_MOV8: { 388 case OP_MOV8: {
280 u8 dst = instruction.dst; 389 sz dst = instruction.dst;
281 u8 src = instruction.a; 390 sz src = instruction.a;
282 vm->regs[dst].i = vm->regs[src].i & 0xFF; 391 vm->regs[dst].i = vm->regs[src].i & 0xFF;
283 } break; 392 } break;
393 case OP_MOV64I: {
394 sz dst = instruction.dst;
395 sz src = instruction.a;
396 vm->regs[dst] = vm->current_chunk->constants[src];
397 } break;
398 case OP_MOV32I: {
399 sz dst = instruction.dst;
400 sz src = instruction.a;
401 vm->regs[dst].i =
402 vm->current_chunk->constants[src].i & 0xFFFFFFFF;
403 } break;
404 case OP_MOV16I: {
405 sz dst = instruction.dst;
406 sz src = instruction.a;
407 vm->regs[dst].i = vm->current_chunk->constants[src].i & 0xFFFF;
408 } break;
409 case OP_MOV8I: {
410 sz dst = instruction.dst;
411 sz src = instruction.a;
412 vm->regs[dst].i = vm->current_chunk->constants[src].i & 0xFF;
413 } break;
414 case OP_PRINTS8: {
415 sz idx = instruction.dst;
416 print("%d", vm->regs[idx].i & 0xFF);
417 } break;
418 case OP_PRINTS16: {
419 sz idx = instruction.dst;
420 print("%d", vm->regs[idx].i & 0xFFFF);
421 } break;
422 case OP_PRINTS32: {
423 sz idx = instruction.dst;
424 print("%d", vm->regs[idx].i & 0xFFFFFFFF);
425 } break;
284 case OP_PRINTS64: { 426 case OP_PRINTS64: {
285 u8 idx = instruction.dst; 427 sz idx = instruction.dst;
286 print("%d", vm->regs[idx].i); 428 print("%d", vm->regs[idx].i);
287 } break; 429 } break;
430 case OP_PRINTU8: {
431 sz idx = instruction.dst;
432 print("%x", vm->regs[idx].u & 0xFF);
433 } break;
434 case OP_PRINTU16: {
435 sz idx = instruction.dst;
436 print("%x", vm->regs[idx].u & 0xFFFF);
437 } break;
438 case OP_PRINTU32: {
439 sz idx = instruction.dst;
440 print("%x", vm->regs[idx].u & 0xFFFFFFFF);
441 } break;
442 case OP_PRINTU64: {
443 sz idx = instruction.dst;
444 print("%x", vm->regs[idx].u);
445 } break;
446 case OP_PRINTS8I: {
447 sz idx = instruction.dst;
448 print("%d", vm->current_chunk->constants[idx].i & 0xFF);
449 } break;
450 case OP_PRINTS16I: {
451 sz idx = instruction.dst;
452 print("%d", vm->current_chunk->constants[idx].i & 0xFFFF);
453 } break;
454 case OP_PRINTS32I: {
455 sz idx = instruction.dst;
456 print("%d", vm->current_chunk->constants[idx].i & 0xFFFFFFFF);
457 } break;
288 case OP_PRINTS64I: { 458 case OP_PRINTS64I: {
289 u8 idx = instruction.dst; 459 sz idx = instruction.dst;
290 print("%d", vm->chunk->constants[idx].i); 460 print("%d", vm->current_chunk->constants[idx].i);
461 } break;
462 case OP_PRINTU8I: {
463 sz idx = instruction.dst;
464 print("%x", vm->current_chunk->constants[idx].u & 0xFF);
465 } break;
466 case OP_PRINTU16I: {
467 sz idx = instruction.dst;
468 print("%x", vm->current_chunk->constants[idx].u & 0xFFFF);
469 } break;
470 case OP_PRINTU32I: {
471 sz idx = instruction.dst;
472 print("%x", vm->current_chunk->constants[idx].u & 0xFFFFFFFF);
473 } break;
474 case OP_PRINTU64I: {
475 sz idx = instruction.dst;
476 print("%x", vm->current_chunk->constants[idx].u);
291 } break; 477 } break;
292 case OP_PRINTBOOL: { 478 case OP_PRINTBOOL: {
293 u8 idx = instruction.dst; 479 sz idx = instruction.dst;
294 bool val = vm->regs[idx].i; 480 bool val = vm->regs[idx].i;
295 if (val) { 481 if (val) {
296 print("true"); 482 print("true");
@@ -299,8 +485,8 @@ vm_run(VM *vm) {
299 } 485 }
300 } break; 486 } break;
301 case OP_PRINTBOOLI: { 487 case OP_PRINTBOOLI: {
302 u8 idx = instruction.dst; 488 sz idx = instruction.dst;
303 bool val = vm->chunk->constants[idx].i; 489 bool val = vm->current_chunk->constants[idx].i;
304 if (val) { 490 if (val) {
305 print("true"); 491 print("true");
306 } else { 492 } else {
@@ -308,25 +494,25 @@ vm_run(VM *vm) {
308 } 494 }
309 } break; 495 } break;
310 case OP_PRINTF64: { 496 case OP_PRINTF64: {
311 u8 idx = instruction.dst; 497 sz idx = instruction.dst;
312 printf("%f", vm->regs[idx].f); 498 printf("%f", vm->regs[idx].f);
313 } break; 499 } break;
314 case OP_PRINTF64I: { 500 case OP_PRINTF64I: {
315 u8 idx = instruction.dst; 501 sz idx = instruction.dst;
316 printf("%f", vm->chunk->constants[idx].f); 502 printf("%f", vm->current_chunk->constants[idx].f);
317 } break; 503 } break;
318 case OP_PRINTSTR: { 504 case OP_PRINTSTR: {
319 u8 idx = instruction.dst; 505 sz idx = instruction.dst;
320 Str *string = (Str *)vm->regs[idx].ptr; 506 Str *string = (Str *)vm->regs[idx].ptr;
321 print("%s", *string); 507 print("%s", *string);
322 } break; 508 } break;
323 case OP_PRINTSTRI: { 509 case OP_PRINTSTRI: {
324 u8 idx = instruction.dst; 510 sz idx = instruction.dst;
325 Str string = vm->chunk->strings[idx]; 511 Str string = vm->current_chunk->strings[idx];
326 print("%s", string); 512 print("%s", string);
327 } break; 513 } break;
328 case OP_RESERVE: { 514 case OP_RESERVE: {
329 sz offset = vm->chunk->constants[instruction.dst].i; 515 sz offset = vm->current_chunk->constants[instruction.dst].i;
330 vm->sp += offset; 516 vm->sp += offset;
331 } break; 517 } break;
332 case OP_PUSH: { 518 case OP_PUSH: {
@@ -336,7 +522,7 @@ vm_run(VM *vm) {
336 vm->sp += sizeof(ptrsize); 522 vm->sp += sizeof(ptrsize);
337 } break; 523 } break;
338 case OP_PUSHI: { 524 case OP_PUSHI: {
339 sz val = vm->chunk->constants[instruction.dst].i; 525 sz val = vm->current_chunk->constants[instruction.dst].i;
340 u64 *p = (u64 *)vm->sp; 526 u64 *p = (u64 *)vm->sp;
341 *p = val; 527 *p = val;
342 vm->sp += sizeof(ptrsize); 528 vm->sp += sizeof(ptrsize);
@@ -351,23 +537,24 @@ vm_run(VM *vm) {
351 vm->fp[-1] = val; 537 vm->fp[-1] = val;
352 } break; 538 } break;
353 case OP_PUTRETI: { 539 case OP_PUTRETI: {
354 sz val = vm->chunk->constants[instruction.dst].i; 540 sz val = vm->current_chunk->constants[instruction.dst].i;
355 vm->fp[-1] = val; 541 vm->fp[-1] = val;
356 } break; 542 } break;
357 case OP_CALL: { 543 case OP_CALL: {
358 u8 dst = instruction.dst; 544 sz dst = instruction.dst;
359 Chunk *func = vm->main->functions[dst]; 545 Chunk *func = vm->chunks[dst];
360 546
361 ptrsize chunk_addr = (ptrsize)vm->chunk; 547 ptrsize chunk_addr = (ptrsize)vm->current_chunk;
362 ptrsize ip_addr = (ptrsize)vm->ip; 548 ptrsize ip_addr = (ptrsize)vm->ip;
363 ptrsize reg_addr = (ptrsize)vm->regs; 549 ptrsize reg_addr = (ptrsize)vm->regs;
364 ptrsize old_fp = (ptrsize)vm->fp; 550 ptrsize old_fp = (ptrsize)vm->fp;
365 551
366 // Allocate space for the locals. 552 // Allocate space for the locals.
367 memset(vm->sp, 0, func->var_off - func->param_off); 553 memset(vm->sp, 0,
368 vm->fp = (u64 *)(vm->sp - func->param_off); 554 func->fun.var_offset - func->fun.param_offset);
369 vm->sp += func->var_off - func->param_off; 555 vm->fp = (u64 *)(vm->sp - func->fun.param_offset);
370 vm->chunk = func; 556 vm->sp += func->fun.var_offset - func->fun.param_offset;
557 vm->current_chunk = func;
371 vm->ip = func->code; 558 vm->ip = func->code;
372 vm->regs = (Constant *)vm->sp; 559 vm->regs = (Constant *)vm->sp;
373 560
@@ -382,6 +569,49 @@ vm_run(VM *vm) {
382 p[3] = old_fp; 569 p[3] = old_fp;
383 vm->sp += sizeof(ptrsize) * 4; 570 vm->sp += sizeof(ptrsize) * 4;
384 } break; 571 } break;
572 case OP_RECUR_SELF: {
573 memset(vm->fp + vm->current_chunk->fun.param_offset, 0,
574 vm->current_chunk->fun.var_offset);
575 vm->ip = vm->current_chunk->code;
576 } break;
577 case OP_RECUR: {
578 u64 *p = (u64 *)vm->sp;
579 ptrsize chunk_addr = p[-4];
580 ptrsize ip_addr = p[-3];
581 ptrsize reg_addr = p[-2];
582 ptrsize old_fp = p[-1];
583 vm->sp -= sizeof(ptrsize) * 4;
584
585 // Deallocate registers.
586 vm->sp -= sizeof(Constant) * vm->current_chunk->reg_idx;
587
588 // Deallocate non param locals.
589 vm->sp -= vm->current_chunk->fun.var_offset;
590 vm->sp += vm->current_chunk->fun.param_offset;
591
592 // Prepare next function.
593 sz dst = instruction.dst;
594 Chunk *func = vm->chunks[dst];
595
596 // Allocate space for locals.
597 memset(vm->sp, 0,
598 func->fun.var_offset - func->fun.param_offset);
599 vm->sp += func->fun.var_offset - func->fun.param_offset;
600 vm->current_chunk = func;
601 vm->ip = func->code;
602 vm->regs = (Constant *)vm->sp;
603
604 // Allocate registers.
605 vm->sp += sizeof(Constant) * func->reg_idx;
606
607 // Restore return information.
608 p = (u64 *)vm->sp;
609 p[0] = chunk_addr;
610 p[1] = ip_addr;
611 p[2] = reg_addr;
612 p[3] = old_fp;
613 vm->sp += sizeof(ptrsize) * 4;
614 } break;
385 case OP_RET: { 615 case OP_RET: {
386 u64 *p = (u64 *)vm->sp; 616 u64 *p = (u64 *)vm->sp;
387 ptrsize chunk_addr = p[-4]; 617 ptrsize chunk_addr = p[-4];
@@ -391,25 +621,37 @@ vm_run(VM *vm) {
391 vm->sp -= sizeof(ptrsize) * 4; 621 vm->sp -= sizeof(ptrsize) * 4;
392 622
393 // Deallocate registers. 623 // Deallocate registers.
394 vm->sp -= sizeof(Constant) * vm->chunk->reg_idx; 624 vm->sp -= sizeof(Constant) * vm->current_chunk->reg_idx;
395 625
396 // Deallocate locals. 626 // Deallocate locals.
397 vm->sp -= vm->chunk->var_off; 627 vm->sp -= vm->current_chunk->fun.var_offset;
398 628
399 // Restore previous activation record. 629 // Restore previous activation record.
400 vm->regs = (Constant *)reg_addr; 630 vm->regs = (Constant *)reg_addr;
401 vm->ip = (Instruction *)ip_addr; 631 vm->ip = (Instruction *)ip_addr;
402 vm->chunk = (Chunk *)chunk_addr; 632 vm->current_chunk = (Chunk *)chunk_addr;
403 vm->fp = (u64 *)old_fp; 633 vm->fp = (u64 *)old_fp;
404 } break; 634 } break;
635 case OP_MEMCPY: {
636 u8 *dst = (u8 *)vm->regs[instruction.dst].ptr;
637 u8 *src = (u8 *)vm->regs[instruction.a].ptr;
638 sz n_bytes = vm->regs[instruction.b].i;
639 memcpy(dst, src, n_bytes);
640 } break;
641 case OP_MEMCPYI: {
642 u8 *dst = (u8 *)vm->regs[instruction.dst].ptr;
643 u8 *src = (u8 *)vm->regs[instruction.a].ptr;
644 sz n_bytes = vm->current_chunk->constants[instruction.b].i;
645 memcpy(dst, src, n_bytes);
646 } break;
405 case OP_HALT: { 647 case OP_HALT: {
406 println("VM halt..."); 648 println("VM halt...");
407 if (instruction.a != 0) { 649 if (instruction.a != 0) {
408 println("Result:"); 650 println("Result:");
409 Constant result = vm->regs[instruction.dst]; 651 Constant result = vm->regs[instruction.dst];
410 printf("\tint -> %lld\n", result.i); 652 printf("\tint -> %ld\n", result.i);
411 printf("\tfloat -> %.10e\n", result.f); 653 printf("\tfloat -> %.10e\n", result.f);
412 printf("\thex -> %llx\n", (u64)result.i); 654 printf("\thex -> %lx\n", (u64)result.i);
413 println("\tbinary -> %b", result.i); 655 println("\tbinary -> %b", result.i);
414 } 656 }
415 return; 657 return;