#ifndef BDL_COMPILER_H #define BDL_COMPILER_H #include "chunk.h" #include "lexer.h" #define MAX_DEPTH 1024 typedef struct Scope { StringView *params; StringView *locals; Token *captured; } Scope; typedef struct Compiler { Token *tokens; size_t current; size_t scope_depth; Scope scopes[MAX_DEPTH]; } Compiler; // Mimics the functionality in the Scanner functions, but for entire tokens. Token next_token(Compiler *compiler); Token peek_token(const Compiler *compiler); bool has_next_token(const Compiler *compiler); // Scope initialization/exit. void enter_scope(Compiler *compiler); void exit_scope(Compiler *compiler); void enter_scope(Compiler *compiler) { Scope *scope = &compiler->scopes[compiler->scope_depth++]; array_init(scope->params, 0); array_init(scope->locals, 0); array_init(scope->captured, 0); } void exit_scope(Compiler *compiler) { Scope *scope = &compiler->scopes[--compiler->scope_depth]; array_free(scope->params); array_free(scope->locals); array_free(scope->captured); } Scope * get_current_scope(Compiler *compiler) { return &compiler->scopes[compiler->scope_depth - 1]; } Chunk * compile(Token *tokens); Token peek_token(const Compiler *compiler) { return compiler->tokens[compiler->current]; } Token next_token(Compiler *compiler) { return compiler->tokens[compiler->current++]; } bool has_next_token(const Compiler *compiler) { return compiler->current < array_size(compiler->tokens); } ssize_t find_local_index(Scope *scope, Token tok) { for (size_t i = 0; i < array_size(scope->locals); i++) { if (sv_equal(&tok.value, &scope->locals[i])) { return i; } } return -1; } ssize_t find_captued_index(Scope *scope, Token tok) { for (size_t i = 0; i < array_size(scope->captured); i++) { if (sv_equal(&tok.value, &scope->captured[i].value)) { return i; } } return -1; } void emit_constant(Chunk *chunk, Token tok, Object obj) { size_t prev_size = array_size(chunk->constants); size_t num_idx = add_constant(chunk, obj); add_code(chunk, OP_CONSTANT, tok.line, tok.column); add_code(chunk, num_idx, tok.line, tok.column); // If the non value constant was already present we need to properly free // the memory from the object given to this function. if (prev_size == array_size(chunk->constants)) { object_free(&obj); } } size_t emit_jump(Chunk *chunk, Token tok, Ops op, ssize_t offset) { add_code(chunk, op, tok.line, tok.column); add_code(chunk, ((u16)offset >> 8) & 0xFF, tok.line, tok.column); add_code(chunk, ((u16)offset) & 0xFF, tok.line, tok.column); return array_size(chunk->code) - 2; } void patch_jump(Chunk *chunk, ssize_t offset) { ssize_t jump = array_size(chunk->code) - offset - 2; assert((u16)jump <= UINT16_MAX && "error: jump is too long"); chunk->code[offset] = ((u16)jump >> 8) & 0xFF; chunk->code[offset + 1] = ((u16)jump) & 0xFF; } void parse_fixnum(Chunk *chunk, Token tok) { ssize_t num = 0; int sign = 1; for (size_t i = 0; i < tok.value.n; i++) { char c = tok.value.start[i]; if (c == '-') { sign = -1; continue; } num = num * 10 + (c - '0'); } emit_constant(chunk, tok, FIXNUM_VAL(num * sign)); } void parse_symbol(Chunk *chunk, Compiler *compiler, Token tok) { if (compiler->scope_depth > 1) { Scope *current_scope = get_current_scope(compiler); ssize_t idx = -1; // Check if the variable was already captured. idx = find_captued_index(current_scope, tok); if (idx >= 0) { emit_constant(chunk, tok, FIXNUM_VAL(idx)); add_code(chunk, OP_CAPTURED, tok.line, tok.column); return; } // Check current scope locals. If we find it, emit OP_LOCAL. idx = find_local_index(current_scope, tok); if (idx >= 0) { emit_constant(chunk, tok, FIXNUM_VAL(idx)); add_code(chunk, OP_LOCAL, tok.line, tok.column); return; } // Descend scopes, if we find the symbol at a different depth, // we need to capture the variable. size_t depth = compiler->scope_depth - 2; while (depth > 0) { Scope *scope = &compiler->scopes[depth]; idx = find_local_index(scope, tok); if (idx >= 0) { // Put captured variable on stack. ssize_t captured_idx = array_size(current_scope->captured); emit_constant(chunk, tok, FIXNUM_VAL(captured_idx)); add_code(chunk, OP_CAPTURED, tok.line, tok.column); array_push(current_scope->captured, tok); return; } depth--; } // TODO: Capture globals? } Object obj = make_symbol(tok.value); emit_constant(chunk, tok, obj); add_code(chunk, OP_GET, tok.line, tok.column); } void parse_tree(Chunk *chunk, Compiler *compiler); void compile_list_binary_op(Chunk *chunk, Compiler *compiler, Token start, Ops op) { size_t n = 0; while (has_next_token(compiler)) { Token tok = peek_token(compiler); if (tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (tok.type == TOKEN_RPAREN) { next_token(compiler); if (n <= 1) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_NOT_ENOUGH_ARGS, .line = start.line, .col = start.column, }); return; } break; } parse_tree(chunk, compiler); n++; } emit_constant(chunk, start, FIXNUM_VAL(n)); add_code(chunk, op, start.line, start.column); } void compile_list_unary_op(Chunk *chunk, Compiler *compiler, Token start, Ops op) { size_t n = 0; while (has_next_token(compiler)) { Token tok = peek_token(compiler); if (tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (tok.type == TOKEN_RPAREN) { next_token(compiler); if (n == 0) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_NOT_ENOUGH_ARGS, .line = start.line, .col = start.column, }); } return; } parse_tree(chunk, compiler); add_code(chunk, op, start.line, start.column); n++; if (n > 1) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_TOO_MANY_ARGS, .line = start.line, .col = start.column, }); } } error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); } void compile_list_simple_op(Chunk *chunk, Compiler *compiler, Token start, Ops op) { if (has_next_token(compiler)) { Token tok = peek_token(compiler); if (tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (tok.type != TOKEN_RPAREN) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_TOO_MANY_ARGS, .line = start.line, .col = start.column, }); return; } next_token(compiler); add_code(chunk, op, start.line, start.column); return; } error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); } #define STR_ARRAY(ARR) (StringView){(ARR), array_size(ARR)} void compile_declare_op(Chunk *chunk, Compiler *compiler, Token start, Ops op) { Token name = next_token(compiler); if (name.type != TOKEN_SYMBOL) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_WRONG_ARG_TYPE, .line = start.line, .col = start.column, }); return; } if (compiler->scope_depth <= 1) { Object obj = make_symbol(name.value); emit_constant(chunk, name, obj); } else { if (op == OP_DEF) { op = OP_DEF_LOCAL; // Check if the local is already registered. Scope *scope = get_current_scope(compiler); ssize_t idx = find_local_index(scope, name); if (idx < 0) { array_push(scope->locals, name.value); idx = chunk->n_locals++; } emit_constant(chunk, name, FIXNUM_VAL(idx)); } else if (op == OP_SET) { // FIXME: This is fucking ugly. Scope *current_scope = get_current_scope(compiler); ssize_t idx = -1; // Check if the variable was already captured. idx = find_captued_index(current_scope, name); if (idx >= 0) { emit_constant(chunk, name, FIXNUM_VAL(idx)); op = OP_SET_CAPTURED; } else { idx = find_local_index(current_scope, name); if (idx >= 0) { emit_constant(chunk, name, FIXNUM_VAL(idx)); op = OP_SET_LOCAL; } else { size_t depth = compiler->scope_depth - 2; while (depth > 0) { Scope *scope = &compiler->scopes[depth]; idx = find_local_index(scope, name); if (idx >= 0) { op = OP_SET_CAPTURED; ssize_t captured_idx = array_size(current_scope->captured); emit_constant(chunk, name, FIXNUM_VAL(captured_idx)); array_push(current_scope->captured, name); break; } depth--; } if (idx < 0) { Object obj = make_symbol(name.value); emit_constant(chunk, name, obj); } } } } } // NOTE: We can have compiler support for preemptively finding if globals // exist or not. Token tok = peek_token(compiler); if (name.type == TOKEN_EOF || tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (name.type == TOKEN_RPAREN || tok.type == TOKEN_RPAREN) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_NOT_ENOUGH_ARGS, .line = start.line, .col = start.column, }); return; } parse_tree(chunk, compiler); if (peek_token(compiler).type != TOKEN_RPAREN) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_TOO_MANY_ARGS, .line = start.line, .col = start.column, }); return; } next_token(compiler); add_code(chunk, op, start.line, start.column); } void compile_lambda(Chunk *chunk, Compiler *compiler, Token start, StringView name) { enter_scope(compiler); Chunk *proc_chunk = chunk_init(name); Object fun = make_lambda(proc_chunk); // Prepeare parameters. Token tok = next_token(compiler); if (tok.type == TOKEN_LPAREN){ while (has_next_token(compiler)) { Token tok = next_token(compiler); if (tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (tok.type == TOKEN_RPAREN) { break; } if (tok.type != TOKEN_SYMBOL) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_WRONG_ARG_TYPE, .line = tok.line, .col = tok.column, }); return; } // Check if parameters name already exists. Scope *scope = get_current_scope(compiler); ssize_t idx = find_local_index(scope, tok); if (idx >= 0) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_AMBIGUOUS_PARAMS, .line = tok.line, .col = tok.column, }); return; } array_push(scope->params, tok.value); array_push(scope->locals, tok.value); fun.closure->chunk->n_params++; fun.closure->chunk->n_locals++; } } else if (tok.type != TOKEN_NIL) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_WRONG_ARG_TYPE, .line = tok.line, .col = tok.column, }); return; } // Compile body. while (has_next_token(compiler)) { Token tok = peek_token(compiler); if (tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (tok.type == TOKEN_RPAREN) { next_token(compiler); break; } parse_tree(fun.closure->chunk, compiler); } add_code(fun.closure->chunk, OP_RETURN, start.line, start.column); // Prepare closure value capture. Scope *scope = get_current_scope(compiler); size_t n_captured = array_size(scope->captured); if (n_captured > 0) { compiler->scope_depth--; for (size_t i = 0; i < n_captured; i++) { Token tok = scope->captured[i]; parse_symbol(chunk, compiler, tok); } // Number of captured values. emit_constant(chunk, tok, FIXNUM_VAL(n_captured)); // Push created lambda to stack. emit_constant(chunk, start, fun); // Emit capture local instruction. add_code(chunk, OP_CAPTURE_LOCAL, tok.line, tok.column); compiler->scope_depth++; } else { emit_constant(chunk, start, fun); } exit_scope(compiler); } void compile_fun_op(Chunk *chunk, Compiler *compiler, Token start) { Token name = peek_token(compiler); if (name.type != TOKEN_SYMBOL) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_WRONG_ARG_TYPE, .line = start.line, .col = start.column, }); return; } Object obj = make_symbol(name.value); emit_constant(chunk, name, obj); next_token(compiler); compile_lambda(chunk, compiler, start, name.value); add_code(chunk, OP_DEF, start.line, start.column); } void compile_call_op(Chunk *chunk, Compiler *compiler, Token start, Token name) { if (name.type == TOKEN_SYMBOL) { parse_symbol(chunk, compiler, name); } // Compile body. size_t n = 0; while (has_next_token(compiler)) { Token tok = peek_token(compiler); if (tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (tok.type == TOKEN_RPAREN) { next_token(compiler); break; } parse_tree(chunk, compiler); n++; } emit_constant(chunk, start, FIXNUM_VAL(n)); add_code(chunk, OP_CALL, start.line, start.column); } void compile_if_op(Chunk *chunk, Compiler *compiler, Token start) { Token tok = peek_token(compiler); if (tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (tok.type == TOKEN_RPAREN) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_NOT_ENOUGH_ARGS, .line = start.line, .col = start.column, }); return; } // Condition. parse_tree(chunk, compiler); size_t jmp_false = emit_jump(chunk, start, OP_JUMP_IF_FALSE, 0xFFFF); // True expression. parse_tree(chunk, compiler); // No second expression. if (peek_token(compiler).type == TOKEN_RPAREN) { patch_jump(chunk, jmp_false); next_token(compiler); return; } // False expression. size_t jmp_end = emit_jump(chunk, start, OP_JUMP, 0xFFFF); patch_jump(chunk, jmp_false); parse_tree(chunk, compiler); patch_jump(chunk, jmp_end); if (peek_token(compiler).type != TOKEN_RPAREN) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_TOO_MANY_ARGS, .line = start.line, .col = start.column, }); return; } next_token(compiler); } void parse_list(Chunk *chunk, Compiler *compiler, Token start) { if (!has_next_token(compiler)) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } while (has_next_token(compiler)) { Token tok = next_token(compiler); if (tok.type == TOKEN_LPAREN) { parse_list(chunk, compiler, tok); } switch (tok.type) { case TOKEN_ADD: { compile_list_binary_op(chunk, compiler, start, OP_SUM); } break; case TOKEN_SUB: { compile_list_binary_op(chunk, compiler, start, OP_SUB); } break; case TOKEN_MUL: { compile_list_binary_op(chunk, compiler, start, OP_MUL); } break; case TOKEN_DIV: { compile_list_binary_op(chunk, compiler, start, OP_DIV); } break; case TOKEN_MOD: { compile_list_binary_op(chunk, compiler, start, OP_MOD); } break; case TOKEN_NOT: { compile_list_unary_op(chunk, compiler, start, OP_NOT); } break; case TOKEN_AND: { compile_list_binary_op(chunk, compiler, start, OP_AND); } break; case TOKEN_OR: { compile_list_binary_op(chunk, compiler, start, OP_OR); } break; case TOKEN_EQUAL: { compile_list_binary_op(chunk, compiler, start, OP_EQUAL); } break; case TOKEN_LESS: { compile_list_binary_op(chunk, compiler, start, OP_LESS); } break; case TOKEN_GREATER: { compile_list_binary_op(chunk, compiler, start, OP_GREATER); } break; case TOKEN_LESS_EQUAL: { compile_list_binary_op(chunk, compiler, start, OP_LESS_EQUAL); } break; case TOKEN_GREATER_EQUAL: { compile_list_binary_op(chunk, compiler, start, OP_GREATER_EQUAL); } break; case TOKEN_PRINT: { compile_list_unary_op(chunk, compiler, start, OP_PRINT); } break; case TOKEN_DISPLAY: { compile_list_unary_op(chunk, compiler, start, OP_DISPLAY); } break; case TOKEN_NEWLINE: { compile_list_simple_op(chunk, compiler, start, OP_NEWLINE); emit_constant(chunk, start, NIL_VAL); } break; case TOKEN_DEF: { compile_declare_op(chunk, compiler, start, OP_DEF); } break; case TOKEN_SET: { compile_declare_op(chunk, compiler, start, OP_SET); } break; case TOKEN_FUN: { compile_fun_op(chunk, compiler, start); } break; case TOKEN_LAMBDA: { compile_lambda(chunk, compiler, start, STRING("lambda")); } break; case TOKEN_IF: { compile_if_op(chunk, compiler, start); } break; default: { compile_call_op(chunk, compiler, start, tok); } break; } return; } } void parse_tree(Chunk *chunk, Compiler *compiler) { Token tok = next_token(compiler); if (errors_n != 0) { return; } switch (tok.type) { case TOKEN_FIXNUM: { parse_fixnum(chunk, tok); return ; } break; case TOKEN_TRUE: { emit_constant(chunk, tok, TRUE_VAL); return; } break; case TOKEN_FALSE: { emit_constant(chunk, tok, FALSE_VAL); return; } break; case TOKEN_RPAREN: { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = tok.line, .col = tok.column, }); return; } break; case TOKEN_QUOTE: { // Object *base = make_pair(obj_quote, obj_nil); // base->cdr = make_pair(obj_nil, obj_nil); // push_root(base); // Object *next_obj = parse_tree(compiler); // if (next_obj == obj_err) { // return obj_err; // } // base->cdr->car = next_obj; // return base; return; } break; case TOKEN_LPAREN: { parse_list(chunk, compiler, tok); return; } break; case TOKEN_STRING: { Object obj = make_string(tok.value); emit_constant(chunk, tok, obj); return; } break; case TOKEN_SYMBOL: { parse_symbol(chunk, compiler, tok); return; } break; case TOKEN_NIL: { emit_constant(chunk, tok, NIL_VAL); return; } break; default: { break; } break; } error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_EOF_REACHED, .line = tok.line, .col = tok.column, }); return; } Chunk * compile(Token *tokens) { Chunk *chunk = NULL; chunk = NEW_CHUNK("main"); Compiler compiler = (Compiler){ .tokens = tokens, .current = 0, .scope_depth = 0, }; enter_scope(&compiler); Token main_start = peek_token(&compiler); while (has_next_token(&compiler)) { Token start = peek_token(&compiler); parse_tree(chunk, &compiler); if (peek_token(&compiler).type == TOKEN_EOF) { break; } add_code(chunk, OP_DROP, start.line, start.column); } add_code(chunk, OP_RETURN, main_start.line, main_start.column); exit_scope(&compiler); return chunk; } #endif // BDL_COMPILER_H