From eeff5e273f22aa28e81ab080e9ffdce85ac394b8 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Fri, 22 Oct 2021 09:59:31 +0200 Subject: Prepare skeleton for bytecode interpreter --- src/bytecode/darray.h | 78 ++++++++++++++ src/bytecode/debug.h | 32 ++++++ src/bytecode/errors.c | 29 +++++ src/bytecode/errors.h | 38 +++++++ src/bytecode/lexer.c | 257 +++++++++++++++++++++++++++++++++++++++++++++ src/bytecode/lexer.h | 60 +++++++++++ src/bytecode/main.c | 197 ++++++++++++++++++++++++++++++++++ src/bytecode/ops.h | 8 ++ src/bytecode/read_line.c | 32 ++++++ src/bytecode/read_line.h | 10 ++ src/bytecode/string_view.c | 40 +++++++ src/bytecode/string_view.h | 21 ++++ src/bytecode/types.h | 30 ++++++ 13 files changed, 832 insertions(+) create mode 100644 src/bytecode/darray.h create mode 100644 src/bytecode/debug.h create mode 100644 src/bytecode/errors.c create mode 100644 src/bytecode/errors.h create mode 100644 src/bytecode/lexer.c create mode 100644 src/bytecode/lexer.h create mode 100644 src/bytecode/main.c create mode 100644 src/bytecode/ops.h create mode 100644 src/bytecode/read_line.c create mode 100644 src/bytecode/read_line.h create mode 100644 src/bytecode/string_view.c create mode 100644 src/bytecode/string_view.h create mode 100644 src/bytecode/types.h (limited to 'src/bytecode') diff --git a/src/bytecode/darray.h b/src/bytecode/darray.h new file mode 100644 index 0000000..db6234d --- /dev/null +++ b/src/bytecode/darray.h @@ -0,0 +1,78 @@ +#ifndef BDL_DARRAY_H +#define BDL_DARRAY_H + +#include + +typedef struct ArrayHeader { + size_t size; + size_t cap; +} ArrayHeader; + +// Header/Size/capacity accessors. +#define array_head(ARR) ((ArrayHeader *)((char *)(ARR) - sizeof(ArrayHeader))) +#define array_size(ARR) ((ARR) ? array_head(ARR)->size : 0) +#define array_cap(ARR) ((ARR) ? array_head(ARR)->cap : 0) + +// Initialize a dynamic array ARR with N elements. The initialization doesn't +// zero out the data, so thread carefully.. +#define array_init(ARR,N) ((ARR) = _array_reserve(N, sizeof(*(ARR)))) + +// Push a given element T to the dynamic array ARR. +#define array_push(ARR, T) \ + ((ARR) = _array_maybe_grow(ARR, sizeof(T)), \ + (ARR)[array_head(ARR)->size++] = (T)) + +// Return the last element of the array. Can be used to build stacks. +#define array_pop(ARR) (ARR)[--array_head(ARR)->size] + +// Insert N bytes from the SRC array into the ARR dynamic array. +#define array_insert(ARR, SRC, N) \ + ((ARR) = _array_insert(ARR, SRC, N, sizeof(*(ARR)))) + +// Free the memory from the original allocated position. +#define array_free(ARR) ((ARR) ? free(array_head(ARR)), (ARR) = NULL : 0) + +static inline void * +_array_reserve(size_t num_elem, size_t type_size) { + char *p = malloc(num_elem * type_size + sizeof(ArrayHeader)); + p += sizeof(ArrayHeader); + array_head(p)->size = 0; + array_head(p)->cap = num_elem; + return p; +} + +static inline void * +_array_maybe_grow(void *arr, size_t type_size) { + ArrayHeader *head = array_head(arr); + if (head->cap == head->size) { + if (head->cap == 0) { + head->cap++; + } else { + head->cap *= 2; + } + head = realloc(head, head->cap * type_size + sizeof(ArrayHeader)); + } + arr = (char *)head + sizeof(ArrayHeader); + return arr; +} + +static inline +char * _array_insert(char *arr, const char *src, size_t n_bytes, size_t type_size) { + ArrayHeader *head = array_head(arr); + size_t new_size = n_bytes + head->size; + if (new_size >= head->cap * type_size) { + if (head->cap == 0) { + head->cap = 1; + } + while (new_size >= head->cap * type_size) { + head->cap *= 2; + } + head = realloc(head, head->cap * type_size + sizeof(ArrayHeader)); + } + arr = (char *)head + sizeof(ArrayHeader); + memcpy((arr + head->size), src, n_bytes); + head->size = new_size; + return arr; +} + +#endif // BDL_DARRAY_H diff --git a/src/bytecode/debug.h b/src/bytecode/debug.h new file mode 100644 index 0000000..3d08d8f --- /dev/null +++ b/src/bytecode/debug.h @@ -0,0 +1,32 @@ +#ifndef BDL_DEBUG_H +#define BDL_DEBUG_H + +void disassemble_chunk(u8 *chunk, const char *name); +size_t disassemble_instruction(u8 *chunk, size_t offset); + +void +disassemble_chunk(u8 *chunk, const char *name) { + printf("== %s ==\n", name); + size_t offset = 0; + while (offset < array_size(chunk)) { + offset = disassemble_instruction(chunk, offset); + } +} + +size_t +disassemble_instruction(u8 *chunk, size_t offset) { + printf("%04ld ", offset); + u8 instruction = chunk[offset]; + switch (instruction) { + case OP_RETURN: { + printf("OP_RETURN\n"); + return offset + 1; + } break; + default: { + printf("Unknown OP: %d\n", instruction); + return offset + 1; + } break; + } +} + +#endif // BDL_DEBUG_H diff --git a/src/bytecode/errors.c b/src/bytecode/errors.c new file mode 100644 index 0000000..d957cfa --- /dev/null +++ b/src/bytecode/errors.c @@ -0,0 +1,29 @@ +#include "errors.h" + +static const char* error_msgs[] = { + [ERR_UNKNOWN] = "error: something unexpected happened", + [ERR_UNMATCHED_STRING] = "error: unmatched string delimiter", + [ERR_UNBALANCED_PAREN] = "error: unbalanced parentheses", + [ERR_NOT_IMPLEMENTED] = "error: not implemented", + [ERR_EOF_REACHED] = "error: EOF reached", + [ERR_UNKNOWN_TOKEN] = "error: unknown token", + [ERR_UNKNOWN_OBJ_TYPE] = "error: can't eval unknown object type", + [ERR_NOT_A_SYMBOL] = "error: object is not a symbol", + [ERR_SYMBOL_NOT_FOUND] = "error: symbol not found", + [ERR_OBJ_NOT_CALLABLE] = "error: object is not callable", + [ERR_NOT_ENOUGH_ARGS] = "error: not enough arguments", + [ERR_TOO_MANY_ARGS] = "error: too many arguments", + [ERR_WRONG_ARG_TYPE] = "error: wrong argument type", + [ERR_DIVISION_BY_ZERO] = "error: division by zero", +}; + +static Error errors[ERR_MAX_NUMBER]; +static size_t errors_n = 0; +static bool supress_errors = false; + +void +error_push(Error error) { + if (errors_n < ERR_MAX_NUMBER) { + errors[errors_n++] = error; + } +} diff --git a/src/bytecode/errors.h b/src/bytecode/errors.h new file mode 100644 index 0000000..7916f4a --- /dev/null +++ b/src/bytecode/errors.h @@ -0,0 +1,38 @@ +#ifndef BDL_ERRORS_H +#define BDL_ERRORS_H + +typedef enum ErrorType { + ERR_TYPE_LEXER, + ERR_TYPE_PARSER, + ERR_TYPE_RUNTIME, +} ErrorType; + +typedef enum ErrorValue { + ERR_UNKNOWN = 0, + ERR_UNMATCHED_STRING, + ERR_UNBALANCED_PAREN, + ERR_NOT_IMPLEMENTED, + ERR_EOF_REACHED, + ERR_UNKNOWN_TOKEN, + ERR_UNKNOWN_OBJ_TYPE, + ERR_NOT_A_SYMBOL, + ERR_SYMBOL_NOT_FOUND, + ERR_OBJ_NOT_CALLABLE, + ERR_NOT_ENOUGH_ARGS, + ERR_TOO_MANY_ARGS, + ERR_WRONG_ARG_TYPE, + ERR_DIVISION_BY_ZERO, +} ErrorValue; + +typedef struct Error { + ErrorType type; + ErrorValue value; + size_t line; + size_t col; +} Error; + +void error_push(Error error); + +#define ERR_MAX_NUMBER 16 + +#endif // BDL_ERRORS_H diff --git a/src/bytecode/lexer.c b/src/bytecode/lexer.c new file mode 100644 index 0000000..38ca37c --- /dev/null +++ b/src/bytecode/lexer.c @@ -0,0 +1,257 @@ +#include "lexer.h" + +void +print_token(Token tok) { + printf("LINE: %3ld COL: %3ld ", tok.line, tok.column); + switch (tok.type) { + case TOKEN_LPAREN: { + printf("TOKEN_LPAREN"); + } break; + case TOKEN_RPAREN: { + printf("TOKEN_RPAREN"); + } break; + case TOKEN_QUOTE: { + printf("TOKEN_QUOTE"); + } break; + case TOKEN_TRUE: { + printf("TOKEN_TRUE"); + } break; + case TOKEN_FALSE: { + printf("TOKEN_FALSE"); + } break; + case TOKEN_NIL: { + printf("TOKEN_NIL"); + } break; + case TOKEN_FIXNUM: { + printf("TOKEN_FIXNUM -> "); + sv_write(&tok.value, stdout); + } break; + case TOKEN_SYMBOL: { + printf("TOKEN_SYMBOL -> "); + sv_write(&tok.value, stdout); + } break; + case TOKEN_STRING: { + printf("TOKEN_STRING -> "); + sv_write(&tok.value, stdout); + } break; + case TOKEN_EOF: { + printf("TOKEN_EOF"); + } break; + case TOKEN_UNKNOWN: { + printf("TOKEN_UNKNOWN"); + } break; + } + printf("\n"); +} + +char +scan_next(Scanner *scanner) { + char c = sv_next(&scanner->current); + if (c == '\n') { + scanner->line_number++; + scanner->col_number = 1; + } else { + scanner->col_number++; + } + scanner->offset++; + return c; +} + +char +scan_peek(const Scanner *scanner) { + return sv_peek(&scanner->current); +} + +bool +scan_has_next(const Scanner *scanner) { + return scanner->current.n != 0; +} + +void +skip_whitespace(Scanner *scanner) { + while (scan_has_next(scanner)) { + char c = scan_peek(scanner); + switch (c) { + case ' ': + case '\f': + case '\n': + case '\r': + case '\t': + case '\v': { + scan_next(scanner); + } break; + default: { + return; + } break; + } + } +} + +bool +is_delimiter(char c) { + switch (c) { + case EOF: + case '\0': + case ';': + case '"': + case '\'': + case '(': + case ')': + case ' ': + case '\f': + case '\n': + case '\r': + case '\t': + case '\v': { + return true; + } break; + } + return false; +} + +TokenType +find_primitive_type(const StringView value) { + bool is_fixnum = true; + for (size_t i = 0; i < value.n; i++) { + char c = value.start[i]; + if (i == 0 && c == '-' && value.n > 1) { + continue; + } + if (!(c >= '0' && c <= '9')) { + is_fixnum = false; + break; + } + } + if (is_fixnum) { + return TOKEN_FIXNUM; + } + if (sv_equal(&value, &(StringView){"true", 4})) { + return TOKEN_TRUE; + } + if (sv_equal(&value, &(StringView){"false", 5})) { + return TOKEN_FALSE; + } + return TOKEN_SYMBOL; +} + +Token * +tokenize(const StringView *sv) { + Token *tokens = NULL; + array_init(tokens, 1); + Scanner scanner = (Scanner){ + .current = *sv, + .line_number = 1, + .col_number = 1, + }; + + while (scan_has_next(&scanner)) { + skip_whitespace(&scanner); + size_t line = scanner.line_number; + size_t col = scanner.col_number; + size_t offset = scanner.offset; + char c = scan_next(&scanner); + switch (c) { + case ';': { + while ((c = scan_next(&scanner)) != '\n' && c != '\0') {} + } break; + case '"': { + char prev = c; + bool found = false; + size_t n = 0; + while (scan_has_next(&scanner)) { + c = scan_next(&scanner); + if (c == '"' && prev != '\\') { + found = true; + break; + } + prev = c; + n++; + } + if (!found) { + error_push((Error){ + .type = ERR_TYPE_LEXER, + .value = ERR_UNMATCHED_STRING, + .line = line, + .col = col, + }); + return tokens; + } + Token token = (Token){ + .value = (StringView){ + .start = &sv->start[offset + 1], + .n = n, + }, + .type = TOKEN_STRING, + .line = line, + .column = col, + }; + array_push(tokens, token); + } break; + case '\'': { + Token token = (Token){ + .type = TOKEN_QUOTE, + .line = line, + .column = col, + }; + array_push(tokens, token); + } break; + case '(': { + if (scan_peek(&scanner) == ')') { + scan_next(&scanner); + Token token = (Token){ + .type = TOKEN_NIL, + .line = line, + .column = col, + }; + array_push(tokens, token); + } else { + Token token = (Token){ + .type = TOKEN_LPAREN, + .line = line, + .column = col, + }; + array_push(tokens, token); + } + } break; + case ')': { + Token token = (Token){ + .type = TOKEN_RPAREN, + .line = line, + .column = col, + }; + array_push(tokens, token); + } break; + default: { + size_t n = 1; + while (!is_delimiter(scan_peek(&scanner))) { + scan_next(&scanner); + n++; + } + if (c == EOF || c == '\0') { + break; + } + Token token = (Token){ + .value = (StringView){ + .start = &sv->start[offset], + .n = n, + }, + .type = TOKEN_SYMBOL, + .line = line, + .column = col, + }; + token.type = find_primitive_type(token.value); + array_push(tokens, token); + } break; + } + } + + // Push EOF token. + Token token = (Token){ + .type = TOKEN_EOF, + .line = scanner.line_number, + .column = 1, + }; + array_push(tokens, token); + + return tokens; +} diff --git a/src/bytecode/lexer.h b/src/bytecode/lexer.h new file mode 100644 index 0000000..e58dd05 --- /dev/null +++ b/src/bytecode/lexer.h @@ -0,0 +1,60 @@ +#ifndef BDL_LEXER_H +#define BDL_LEXER_H + +#include "string_view.h" + + +typedef enum TokenType { + TOKEN_UNKNOWN = 0, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_QUOTE, + TOKEN_TRUE, + TOKEN_FALSE, + TOKEN_NIL, + TOKEN_FIXNUM, + TOKEN_SYMBOL, + TOKEN_STRING, + TOKEN_EOF, +} TokenType; + +typedef struct Token { + TokenType type; + StringView value; + size_t line; + size_t column; +} Token; + +typedef struct Scanner { + StringView current; + size_t line_number; + size_t col_number; + size_t offset; +} Scanner; + +// Print a token to standard output for debugging purposes. +void print_token(Token tok); + +// Same functionality as the ScanView pairs, but keeping track of line and +// column numbers. +char scan_next(Scanner *scanner); +char scan_peek(const Scanner *scanner); + +// Check if the current scanner still have characters left. +bool scan_has_next(const Scanner *scanner); + +// Advance the scanner until we ran out of whitespace. +void skip_whitespace(Scanner *scanner); + +// Check if a given character is a delimiter. +bool is_delimiter(char c); + +// Extract the token type from the current string. +TokenType find_primitive_type(const StringView value); + +// Generate a list of tokens from the given string. +Token * tokenize(const StringView *sv); + +#define TOK_BUF_CAP 256 + +#endif // BDL_LEXER_H diff --git a/src/bytecode/main.c b/src/bytecode/main.c new file mode 100644 index 0000000..78fdfd3 --- /dev/null +++ b/src/bytecode/main.c @@ -0,0 +1,197 @@ +#include +#include +#include +#include +#include + +#include "types.h" +#include "darray.h" +#include "ops.h" +#include "debug.h" +#include "errors.c" +#include "lexer.c" +#include "read_line.c" +#include "string_view.c" + +void +init(void) { + // STUB +} + +void +process_source(const StringView *source) { + Token *tokens = tokenize(source); + if (errors_n != 0) { + array_free(tokens); + return; + } + + // Test chunks and debugging utilities. + u8 *chunk = NULL; + array_init(chunk, 0); + array_push(chunk, OP_RETURN); + array_push(chunk, OP_RETURN); + array_push(chunk, OP_RETURN); + array_push(chunk, OP_RETURN); + disassemble_chunk(chunk, "test chunk"); + + array_free(chunk); + array_free(tokens); +} + +#define REPL_PROMPT "bdl> " + +void +run_repl(void) { + printf("BDL REPL (Press Ctrl-D or Ctrl-C to exit)\n"); + while (true) { + printf(REPL_PROMPT); + StringView sv = read_line(); + if (sv.start == NULL) { + return; + } + process_source(&sv); + + // Check if there were any errors. + if (errors_n != 0 && !supress_errors) { + for (size_t i = 0; i < errors_n; i++) { + Error err = errors[i]; + for (size_t j = 0; j < err.col + sizeof(REPL_PROMPT) - 2; j++) { + putchar(' '); + } + printf("|\n"); + for (size_t j = 0; j < err.col + sizeof(REPL_PROMPT) - 2; j++) { + putchar(' '); + } + printf("%s\n", error_msgs[err.value]); + } + errors_n = 0; + continue; + } + } +} + +void +run_file(char *file_name) { + FILE *file = fopen(file_name, "r"); + if (!file) { + fprintf(stderr, "error: couldn't open input file: %s\n", file_name); + exit(EXIT_FAILURE); + } + + // Read entire file into memory. + fseek(file, 0, SEEK_END); + size_t file_size = ftell(file); + fseek(file, 0, SEEK_SET); + + char *source = malloc(file_size + 1); + fread(source, 1, file_size, file); + source[file_size] = 0; + + StringView sv = (StringView){ + .start = source, + .n = file_size, + }; + + process_source(&sv); + + // Check if there were any errors. + if (errors_n != 0 && !supress_errors) { + for (size_t i = 0; i < errors_n; i++) { + Error err = errors[i]; + fprintf(stderr, "%s", file_name); + if (err.line != 0) { + fprintf(stderr, ":%ld:%ld", err.line, err.col); + } + fprintf(stderr, ": %s\n", error_msgs[err.value]); + } + errors_n = 0; + } + + free(source); + fclose(file); +} + +#define STDIN_BUF_CAP 16 + +void +run_stdin(void) { + size_t buf_size = 0; + char *source = NULL; + array_init(source, STDIN_BUF_CAP); + + char c; + while ((c = getchar()) != EOF) { + array_push(source, c); + buf_size++; + } + + StringView sv = (StringView){ + .start = source, + .n = buf_size, + }; + + process_source(&sv); + + // Check if there were any errors. + if (errors_n != 0 && !supress_errors) { + for (size_t i = 0; i < errors_n; i++) { + Error err = errors[i]; + fprintf(stderr, "stdin"); + if (err.line != 0) { + fprintf(stderr, ":%ld:%ld", err.line, err.col); + } + fprintf(stderr, ": %s\n", error_msgs[err.value]); + } + errors_n = 0; + } + + array_free(source); +} + +#ifndef BIN_NAME +#define BIN_NAME "bdl" +#endif + +void +print_usage(void) { + printf("Usage: %s [options] \n", BIN_NAME); + printf("\n"); + printf("\t-i\tInteractive mode (REPL).\n"); + printf("\n"); +} + +int +main(int argc, char *argv[]) { + init(); + + int option; + while ((option = getopt(argc, argv, "i")) != -1) { + switch (option) { + case 'i': { + // Interactive mode. + run_repl(); + return EXIT_SUCCESS; + } break; + default: { + print_usage(); + return EXIT_FAILURE; + } break; + } + } + + // Run from stdin. + if (optind == argc) { + run_stdin(); + return EXIT_SUCCESS; + } + + // Run from file. + while (optind < argc) { + char *file_name = argv[optind]; + run_file(file_name); + optind++; + } + + return EXIT_SUCCESS; +} diff --git a/src/bytecode/ops.h b/src/bytecode/ops.h new file mode 100644 index 0000000..f7001ad --- /dev/null +++ b/src/bytecode/ops.h @@ -0,0 +1,8 @@ +#ifndef BDL_OPS_H +#define BDL_OPS_H + +typedef enum Ops { + OP_RETURN = 1, +} Ops; + +#endif // BDL_OPS_H diff --git a/src/bytecode/read_line.c b/src/bytecode/read_line.c new file mode 100644 index 0000000..03146ad --- /dev/null +++ b/src/bytecode/read_line.c @@ -0,0 +1,32 @@ +#include "read_line.h" + +static char readline_buf[RL_BUF_SIZE]; + +StringView +read_line(void) { + // Clear buffer. + for (size_t i = 0; i < RL_BUF_SIZE; i++) { + readline_buf[i] = 0; + } + + // Barebones readline implementation. + size_t n = 0; + char c; + while ((c = getchar()) != '\n') { + if (c == '\b') { + readline_buf[n] = '\0'; + n--; + } else if (c == EOF || c == '\0') { + return (StringView){ .start = NULL, .n = 0 }; + } else if ((c >= ' ' && c <= '~') && n < RL_BUF_SIZE) { + readline_buf[n] = c; + n++; + } + } + + StringView sv = (StringView){ + .start = (char *)&readline_buf, + .n = n, + }; + return sv; +} diff --git a/src/bytecode/read_line.h b/src/bytecode/read_line.h new file mode 100644 index 0000000..160bce0 --- /dev/null +++ b/src/bytecode/read_line.h @@ -0,0 +1,10 @@ +#ifndef BDL_READ_LINE_H +#define BDL_READ_LINE_H + +#include "string_view.h" + +StringView read_line(void); + +#define RL_BUF_SIZE 1024 + +#endif // BDL_READ_LINE_H diff --git a/src/bytecode/string_view.c b/src/bytecode/string_view.c new file mode 100644 index 0000000..39fabe9 --- /dev/null +++ b/src/bytecode/string_view.c @@ -0,0 +1,40 @@ +#include "string_view.h" + +char +sv_next(StringView *sv) { + if (sv->n == 0) { + return '\0'; + } + char c = sv->start[0]; + sv->start++; + sv->n--; + return c; +} + +char +sv_peek(const StringView *sv) { + if (sv->n == 0) { + return '\0'; + } + return sv->start[0]; +} + +bool +sv_equal(const StringView *a, const StringView *b) { + if (a->n != b->n) { + return false; + } + for (size_t i = 0; i < a->n; i++) { + if (a->start[i] != b->start[i]) { + return false; + } + } + return true; +} + +void +sv_write(const StringView *sv, FILE *file) { + for (size_t i = 0; i < sv->n; i++) { + putc(sv->start[i], file); + } +} diff --git a/src/bytecode/string_view.h b/src/bytecode/string_view.h new file mode 100644 index 0000000..42273ab --- /dev/null +++ b/src/bytecode/string_view.h @@ -0,0 +1,21 @@ +#ifndef BDL_STRINGVIEW_H +#define BDL_STRINGVIEW_H + +typedef struct StringView { + char *start; + size_t n; +} StringView; + +// Consume a character in the stream. +char sv_next(StringView *sv); + +// Check what is the current character in the stream. +char sv_peek(const StringView *sv); + +// Compare if the arguments are the same. +bool sv_equal(const StringView *a, const StringView *b); + +// Write a character to the given output stream. +void sv_write(const StringView *sv, FILE *file); + +#endif // BDL_STRINGVIEW_H diff --git a/src/bytecode/types.h b/src/bytecode/types.h new file mode 100644 index 0000000..dc21756 --- /dev/null +++ b/src/bytecode/types.h @@ -0,0 +1,30 @@ +#ifndef BDL_TYPES_H +#define BDL_TYPES_H + +#include +#include +#include + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; +typedef volatile u8 vu8; +typedef volatile u16 vu16; +typedef volatile u32 vu32; +typedef volatile u64 vu64; +typedef volatile s8 vs8; +typedef volatile s16 vs16; +typedef volatile s32 vs32; +typedef volatile s64 vs64; + +#define KB(N) ((u64)(N) * 1024) +#define MB(N) ((u64)KB(N) * 1024) +#define GB(N) ((u64)MB(N) * 1024) +#define TB(N) ((u64)GB(N) * 1024) + +#endif // BDL_TYPES_H -- cgit v1.2.1