#ifndef BDL_COMPILER_H #define BDL_COMPILER_H #include "chunk.h" #include "lexer.h" typedef struct Compiler { Token *tokens; size_t current; } Compiler; // Mimics the functionality in the Scanner functions, but for entire tokens. Token next_token(Compiler *compiler); Token peek_token(const Compiler *compiler); bool has_next_token(const Compiler *compiler); Chunk * compile(Token *tokens); Token peek_token(const Compiler *compiler) { return compiler->tokens[compiler->current]; } Token next_token(Compiler *compiler) { return compiler->tokens[compiler->current++]; } bool has_next_token(const Compiler *compiler) { return compiler->current < array_size(compiler->tokens); } ssize_t find_local_index(Chunk *chunk, Token tok) { // NOTE: This is dumb and potentially slow. for (size_t i = 0; i < array_size(chunk->locals); i++) { if (sv_equal(&tok.value, &chunk->locals[i])) { return i; } } return -1; } void emit_constant(Chunk *chunk, Token tok, Object obj) { size_t prev_size = array_size(chunk->constants); size_t num_idx = add_constant(chunk, obj); add_code(chunk, OP_CONSTANT, tok.line, tok.column); add_code(chunk, num_idx, tok.line, tok.column); // If the non value constant was already present we need to properly free // the memory from the object given to this function. if (prev_size == array_size(chunk->constants)) { object_free(obj); } } size_t emit_jump(Chunk *chunk, Token tok, Ops op, ssize_t offset) { add_code(chunk, op, tok.line, tok.column); add_code(chunk, ((u16)offset >> 8) & 0xFF, tok.line, tok.column); add_code(chunk, ((u16)offset) & 0xFF, tok.line, tok.column); return array_size(chunk->code) - 2; } void patch_jump(Chunk *chunk, ssize_t offset) { ssize_t jump = array_size(chunk->code) - offset - 2; assert((u16)jump <= UINT16_MAX && "error: jump is too long"); chunk->code[offset] = ((u16)jump >> 8) & 0xFF; chunk->code[offset + 1] = ((u16)jump) & 0xFF; } void parse_fixnum(Chunk *chunk, Token tok) { ssize_t num = 0; int sign = 1; for (size_t i = 0; i < tok.value.n; i++) { char c = tok.value.start[i]; if (c == '-') { sign = -1; continue; } num = num * 10 + (c - '0'); } emit_constant(chunk, tok, FIXNUM_VAL(num * sign)); } void parse_tree(Chunk *chunk, Compiler *compiler); void compile_list_binary_op(Chunk *chunk, Compiler *compiler, Token start, Ops op) { size_t n = 0; while (has_next_token(compiler)) { Token tok = peek_token(compiler); if (tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (tok.type == TOKEN_RPAREN) { next_token(compiler); if (n <= 1) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_NOT_ENOUGH_ARGS, .line = start.line, .col = start.column, }); return; } break; } parse_tree(chunk, compiler); n++; } emit_constant(chunk, start, FIXNUM_VAL(n)); add_code(chunk, op, start.line, start.column); } void compile_list_unary_op(Chunk *chunk, Compiler *compiler, Token start, Ops op) { size_t n = 0; while (has_next_token(compiler)) { Token tok = peek_token(compiler); if (tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (tok.type == TOKEN_RPAREN) { next_token(compiler); if (n == 0) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_NOT_ENOUGH_ARGS, .line = start.line, .col = start.column, }); } return; } parse_tree(chunk, compiler); add_code(chunk, op, start.line, start.column); n++; if (n > 1) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_TOO_MANY_ARGS, .line = start.line, .col = start.column, }); } } error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); } void compile_list_simple_op(Chunk *chunk, Compiler *compiler, Token start, Ops op) { if (has_next_token(compiler)) { Token tok = peek_token(compiler); if (tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (tok.type != TOKEN_RPAREN) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_TOO_MANY_ARGS, .line = start.line, .col = start.column, }); return; } next_token(compiler); add_code(chunk, op, start.line, start.column); return; } error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); } #define STR_ARRAY(ARR) (StringView){(ARR), array_size(ARR)} void compile_declare_op(Chunk *chunk, Compiler *compiler, Token start, Ops op) { Token name = next_token(compiler); if (name.type != TOKEN_SYMBOL) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_WRONG_ARG_TYPE, .line = start.line, .col = start.column, }); return; } // TODO: If we are inside a function and we are using OP_DEF, we just // declare the local variable directly in the stack. No need for symbols! // TODO: If we are inside a function and we are using OP_SET, check if the // local variable has been defined before, if not try to read from the // globals for setting. if (sv_equal(&STRING(""), &STR_ARRAY(chunk->name))) { Object obj = make_symbol(name.value); emit_constant(chunk, name, obj); } else { // TODO: only do this if we are defining! not setting. // Check if we already have the local ssize_t idx = find_local_index(chunk, name); if (idx < 0) { array_push(chunk->locals, name.value); idx = array_size(chunk->locals) - 1; } if (op == OP_DEF) { op = OP_DEF_LOCAL; } else if (op == OP_SET) { op = OP_SET_LOCAL; } emit_constant(chunk, name, FIXNUM_VAL(idx)); } Token tok = peek_token(compiler); if (name.type == TOKEN_EOF || tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (name.type == TOKEN_RPAREN || tok.type == TOKEN_RPAREN) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_NOT_ENOUGH_ARGS, .line = start.line, .col = start.column, }); return; } parse_tree(chunk, compiler); if (peek_token(compiler).type != TOKEN_RPAREN) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_TOO_MANY_ARGS, .line = start.line, .col = start.column, }); return; } next_token(compiler); add_code(chunk, op, start.line, start.column); } void compile_lambda(Chunk *chunk, Compiler *compiler, Token start, StringView name) { Object fun = make_lambda(name); // Prepeare parameters. Token tok = next_token(compiler); if (tok.type == TOKEN_LPAREN){ while (has_next_token(compiler)) { Token tok = next_token(compiler); if (tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (tok.type == TOKEN_RPAREN) { break; } if (tok.type != TOKEN_SYMBOL) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_WRONG_ARG_TYPE, .line = tok.line, .col = tok.column, }); return; } // Check if parameters name already exists. ssize_t idx = find_local_index(fun.chunk, tok); if (idx >= 0) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_AMBIGUOUS_PARAMS, .line = tok.line, .col = tok.column, }); return; } array_push(fun.chunk->params, tok.value); array_push(fun.chunk->locals, tok.value); } } else if (tok.type != TOKEN_NIL) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_WRONG_ARG_TYPE, .line = tok.line, .col = tok.column, }); return; } // Compile body. while (has_next_token(compiler)) { Token tok = peek_token(compiler); if (tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (tok.type == TOKEN_RPAREN) { next_token(compiler); break; } parse_tree(fun.chunk, compiler); } add_code(fun.chunk, OP_RETURN, start.line, start.column); emit_constant(chunk, start, fun); } void compile_fun_op(Chunk *chunk, Compiler *compiler, Token start) { Token name = peek_token(compiler); if (name.type != TOKEN_SYMBOL) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_WRONG_ARG_TYPE, .line = start.line, .col = start.column, }); return; } Object obj = make_symbol(name.value); emit_constant(chunk, name, obj); next_token(compiler); compile_lambda(chunk, compiler, start, name.value); add_code(chunk, OP_DEF, start.line, start.column); } void compile_call_op(Chunk *chunk, Compiler *compiler, Token start, Token name) { // Compile body. size_t n = 0; while (has_next_token(compiler)) { Token tok = peek_token(compiler); if (tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (tok.type == TOKEN_RPAREN) { next_token(compiler); break; } parse_tree(chunk, compiler); n++; } if (name.type == TOKEN_SYMBOL) { Object obj = make_symbol(name.value); emit_constant(chunk, start, obj); add_code(chunk, OP_GET, start.line, start.column); } emit_constant(chunk, start, FIXNUM_VAL(n)); add_code(chunk, OP_CALL, start.line, start.column); } void compile_if_op(Chunk *chunk, Compiler *compiler, Token start) { Token tok = peek_token(compiler); if (tok.type == TOKEN_EOF) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } if (tok.type == TOKEN_RPAREN) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_NOT_ENOUGH_ARGS, .line = start.line, .col = start.column, }); return; } // Condition. parse_tree(chunk, compiler); size_t jmp_false = emit_jump(chunk, start, OP_JUMP_IF_FALSE, 0xFFFF); // True expression. parse_tree(chunk, compiler); // No second expression. if (peek_token(compiler).type == TOKEN_RPAREN) { patch_jump(chunk, jmp_false); next_token(compiler); return; } // False expression. size_t jmp_end = emit_jump(chunk, start, OP_JUMP, 0xFFFF); patch_jump(chunk, jmp_false); parse_tree(chunk, compiler); patch_jump(chunk, jmp_end); if (peek_token(compiler).type != TOKEN_RPAREN) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_TOO_MANY_ARGS, .line = start.line, .col = start.column, }); return; } next_token(compiler); } void parse_list(Chunk *chunk, Compiler *compiler, Token start) { if (!has_next_token(compiler)) { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = start.line, .col = start.column, }); return; } while (has_next_token(compiler)) { Token tok = next_token(compiler); if (tok.type == TOKEN_LPAREN) { parse_list(chunk, compiler, tok); } switch (tok.type) { case TOKEN_ADD: { compile_list_binary_op(chunk, compiler, start, OP_SUM); } break; case TOKEN_SUB: { compile_list_binary_op(chunk, compiler, start, OP_SUB); } break; case TOKEN_MUL: { compile_list_binary_op(chunk, compiler, start, OP_MUL); } break; case TOKEN_DIV: { compile_list_binary_op(chunk, compiler, start, OP_DIV); } break; case TOKEN_MOD: { compile_list_binary_op(chunk, compiler, start, OP_MOD); } break; case TOKEN_NOT: { compile_list_unary_op(chunk, compiler, start, OP_NOT); } break; case TOKEN_AND: { compile_list_binary_op(chunk, compiler, start, OP_AND); } break; case TOKEN_OR: { compile_list_binary_op(chunk, compiler, start, OP_OR); } break; case TOKEN_EQUAL: { compile_list_binary_op(chunk, compiler, start, OP_EQUAL); } break; case TOKEN_LESS: { compile_list_binary_op(chunk, compiler, start, OP_LESS); } break; case TOKEN_GREATER: { compile_list_binary_op(chunk, compiler, start, OP_GREATER); } break; case TOKEN_LESS_EQUAL: { compile_list_binary_op(chunk, compiler, start, OP_LESS_EQUAL); } break; case TOKEN_GREATER_EQUAL: { compile_list_binary_op(chunk, compiler, start, OP_GREATER_EQUAL); } break; case TOKEN_PRINT: { compile_list_unary_op(chunk, compiler, start, OP_PRINT); } break; case TOKEN_DISPLAY: { compile_list_unary_op(chunk, compiler, start, OP_DISPLAY); } break; case TOKEN_NEWLINE: { compile_list_simple_op(chunk, compiler, start, OP_NEWLINE); } break; case TOKEN_DEF: { compile_declare_op(chunk, compiler, start, OP_DEF); } break; case TOKEN_SET: { compile_declare_op(chunk, compiler, start, OP_SET); } break; case TOKEN_FUN: { compile_fun_op(chunk, compiler, start); } break; case TOKEN_LAMBDA: { compile_lambda(chunk, compiler, start, STRING("lambda")); } break; case TOKEN_IF: { compile_if_op(chunk, compiler, start); } break; default: { compile_call_op(chunk, compiler, start, tok); } break; } return; } } void parse_tree(Chunk *chunk, Compiler *compiler) { Token tok = next_token(compiler); if (errors_n != 0) { return; } switch (tok.type) { case TOKEN_FIXNUM: { parse_fixnum(chunk, tok); return ; } break; case TOKEN_TRUE: { emit_constant(chunk, tok, TRUE_VAL); return; } break; case TOKEN_FALSE: { emit_constant(chunk, tok, FALSE_VAL); return; } break; case TOKEN_RPAREN: { error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_UNBALANCED_PAREN, .line = tok.line, .col = tok.column, }); return; } break; case TOKEN_QUOTE: { // Object *base = make_pair(obj_quote, obj_nil); // base->cdr = make_pair(obj_nil, obj_nil); // push_root(base); // Object *next_obj = parse_tree(compiler); // if (next_obj == obj_err) { // return obj_err; // } // base->cdr->car = next_obj; // return base; return; } break; case TOKEN_LPAREN: { parse_list(chunk, compiler, tok); return; } break; case TOKEN_STRING: { Object obj = make_string(tok.value); emit_constant(chunk, tok, obj); return; } break; case TOKEN_SYMBOL: { ssize_t idx = find_local_index(chunk, tok); if (idx < 0) { Object obj = make_symbol(tok.value); emit_constant(chunk, tok, obj); add_code(chunk, OP_GET, tok.line, tok.column); return; } add_code(chunk, OP_LOCAL, tok.line, tok.column); add_code(chunk, idx, tok.line, tok.column); return; } break; case TOKEN_NIL: { emit_constant(chunk, tok, NIL_VAL); return; } break; default: { break; } break; } error_push((Error){ .type = ERR_TYPE_COMPILER, .value = ERR_EOF_REACHED, .line = tok.line, .col = tok.column, }); return; } Chunk * compile(Token *tokens) { Chunk *chunk = NULL; chunk = NEW_CHUNK(""); Compiler compiler = (Compiler){ .tokens = tokens, .current = 0, }; Token main_start = peek_token(&compiler); while (has_next_token(&compiler)) { Token start = peek_token(&compiler); parse_tree(chunk, &compiler); if (peek_token(&compiler).type == TOKEN_EOF) { break; } add_code(chunk, OP_DROP, start.line, start.column); } add_code(chunk, OP_RETURN, main_start.line, main_start.column); return chunk; } #endif // BDL_COMPILER_H