From 8a8b6595ebec509f1b5ae155a38a99347820225c Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Thu, 21 Oct 2021 14:28:37 +0200 Subject: Add WIP hashtable implementation --- src/bootstrap/gc.c | 2 +- src/bootstrap/gc.h | 2 +- src/bootstrap/hashtable.h | 158 +++++++++++++ src/bootstrap/main.c | 558 +++++++++++++++++++++++++--------------------- src/bootstrap/objects.c | 2 +- src/bootstrap/objects.h | 2 +- 6 files changed, 461 insertions(+), 263 deletions(-) create mode 100644 src/bootstrap/hashtable.h (limited to 'src') diff --git a/src/bootstrap/gc.c b/src/bootstrap/gc.c index 259cccd..a188279 100644 --- a/src/bootstrap/gc.c +++ b/src/bootstrap/gc.c @@ -37,7 +37,7 @@ pop_active_env(void) { } void -init_gc(void) { +gc_init(void) { gc = (GC){0}; array_init(gc.objects, GC_OBJS_CAP); diff --git a/src/bootstrap/gc.h b/src/bootstrap/gc.h index 0a0bbd0..9ad1615 100644 --- a/src/bootstrap/gc.h +++ b/src/bootstrap/gc.h @@ -18,7 +18,7 @@ typedef struct GC { Environment **active_envs; } GC; -void init_gc(void); +void gc_init(void); // Allocation functions for objects and environments. Object * alloc_object(ObjectType type); diff --git a/src/bootstrap/hashtable.h b/src/bootstrap/hashtable.h new file mode 100644 index 0000000..baffec0 --- /dev/null +++ b/src/bootstrap/hashtable.h @@ -0,0 +1,158 @@ +#ifndef BDL_HASHTABLE_H +#define BDL_HASHTABLE_H + +#include "darray.h" +#include "objects.h" + +// Minimum table capacity. +#define HT_MIN_CAP 4 +#define HT_MIN_SHIFT 2 + +// Adjust the load factor threshold at which the table will grow on insertion. +#define HT_LOAD_THRESHOLD 0.75 + +typedef struct HashTablePair { + Object *key; + Object *value; +} HashTablePair; + +typedef struct HashTable { + // All available key-value pairs as a dynamic array. + HashTablePair *pairs; + + // This table expects the number of buckets to grow in powers of two. To + // speedup the default hashing, we memoize the number of bits equivalent to + // that power of 2: + // + // cap := 1024 = 2 ^ 10, shift_amount := 10 + // + uint8_t shift_amount; +} HashTable; + +// Use Fibonacci hashing to map a hash to a value in range of the table. +static inline uint64_t +_fibonacci_hash(uint64_t hash, size_t shift_amount) { + return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); +} + +uint64_t +ht_hash(const HashTable *table, const Object *key) { + uint64_t hash = 0x65d9d65f6a19574f; + // printf("HASHING: "); + // display(key); + // printf("\n"); + return 0; +} + +static inline size_t +ht_load_factor(const HashTable *table) { + return array_size(table->pairs) / array_cap(table->pairs); +} + +HashTable * +ht_init(void) { + HashTable *table = malloc(sizeof(HashTable)); + table->pairs = NULL; + array_init(table->pairs, HT_MIN_CAP); + // Clear the table ensuring all references point to NULL. + for (size_t i = 0; i < array_cap(table->pairs); i++) { + table->pairs[i] = (HashTablePair){NULL, NULL}; + } + table->shift_amount = HT_MIN_SHIFT; + return table; +} + +void +ht_insert(HashTable *table, const Object *key, const Object *value) { + // TODO: Grow if needed. + size_t position = ht_hash(table, key); + size_t probe_position = position; + + // Verify the key in that position is free. If not, use linear probing to + // find the next free slot. + HashTablePair *pairs = table->pairs; + while (true) { + if (pairs[probe_position].key == NULL) { + break; + } + if (obj_eq(pairs[probe_position].key, key)) { + break; + } + if (probe_position == array_cap(pairs)) { + probe_position = 0; + } else { + probe_position++; + } + } + pairs[probe_position].key = (Object *)key; + pairs[probe_position].value = (Object *)value; + return; +} + +Object * +ht_lookup(const HashTable *table, const Object *key) { + size_t position = ht_hash(table, key); + size_t probe_position = position; + + // Verify the key in that position is the same. If not perform linear + // probing to find it. + HashTablePair *pairs = table->pairs; + while (true) { + if (pairs[probe_position].key == NULL) { + return NULL; + } + if (obj_eq(pairs[probe_position].key, key)) { + break; + } + if (probe_position == array_cap(pairs)) { + probe_position = 0; + } else { + probe_position++; + } + } + return pairs[probe_position].value; +} + +void +ht_debug(HashTable *table) { + HashTablePair *pairs = table->pairs; + for (size_t i = 0; i < array_cap(pairs); i++) { + printf("i: %ld ", i); + if (pairs[i].key == NULL) { + printf("EMPTY\n"); + } else { + printf("key: "); + display(pairs[i].key); + printf(" value: "); + display(pairs[i].value); + printf("\n"); + } + } +} + +// // Use Fibonacci hashing to map a hash to a value in range of the table. +// static inline +// uint64_t _fibonacci_hash(uint64_t hash, size_t shift_amount) { +// return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); +// } + +// // Hash a null terminated string using a circular shift + XOR hash function. +// static inline +// uint64_t _string_hash(const HashStash *table, const char *key) { +// uint64_t hash = 0x65d9d65f6a19574f; +// while (*key) { +// hash ^= (uint64_t)*key++; +// hash = (hash << 8) | (hash >> (64 - 8)); +// } +// return _fibonacci_hash(hash, table->shift_amount); +// } + +// // Return the key as an unsigned integer. +// static inline +// uint64_t _number_hash(const HashStash *table, const char *key) { +// uint64_t hash = 0; +// memcpy(&hash, key, table->key_length); +// return _fibonacci_hash(hash, table->shift_amount); +// } + +#endif // BDL_HASHTABLE_H diff --git a/src/bootstrap/main.c b/src/bootstrap/main.c index 73a9244..1cb7c82 100755 --- a/src/bootstrap/main.c +++ b/src/bootstrap/main.c @@ -7,6 +7,7 @@ #include #include "darray.h" +#include "hashtable.h" #include "singletons.c" @@ -20,268 +21,307 @@ #include "read_line.c" #include "string_view.c" -void -init(void) { - // Initialize garbage collector. - init_gc(); - - // Initialize singletons. - obj_nil = alloc_object(OBJ_TYPE_NIL); - obj_true = alloc_object(OBJ_TYPE_BOOL); - obj_false = alloc_object(OBJ_TYPE_BOOL); - obj_err = alloc_object(OBJ_TYPE_ERR); - obj_quote = make_symbol((StringView){"quote", 5}); - proc_if = alloc_object(OBJ_TYPE_ERR); - push_root(obj_nil); - push_root(obj_true); - push_root(obj_false); - push_root(obj_err); - push_root(obj_quote); - push_root(proc_if); - - // Global environment. - global_env = env_create(NULL); - // TODO: make sure we create symbols and strings only once (interning - // strings?) - push_active_env(global_env); - - // Primitive symbols. - MAKE_ENV_VAR(global_env, "else", obj_true); - MAKE_ENV_VAR(global_env, "nil", obj_nil); - MAKE_ENV_VAR(global_env, "if", proc_if); - - // Primitive procedures. - MAKE_ENV_PROC(global_env, "eval", proc_eval); - MAKE_ENV_PROC(global_env, "quote", proc_quote); - MAKE_ENV_PROC(global_env, "car", proc_car); - MAKE_ENV_PROC(global_env, "cdr", proc_cdr); - MAKE_ENV_PROC(global_env, "cons", proc_cons); - MAKE_ENV_PROC(global_env, "list", proc_list); - MAKE_ENV_PROC(global_env, "+", proc_sum); - MAKE_ENV_PROC(global_env, "-", proc_sub); - MAKE_ENV_PROC(global_env, "*", proc_mul); - MAKE_ENV_PROC(global_env, "/", proc_div); - MAKE_ENV_PROC(global_env, "%", proc_mod); - MAKE_ENV_PROC(global_env, "print", proc_print); - MAKE_ENV_PROC(global_env, "display", proc_display); - MAKE_ENV_PROC(global_env, "newline", proc_newline); - MAKE_ENV_PROC(global_env, "boolean?", proc_is_boolean); - MAKE_ENV_PROC(global_env, "nil?", proc_is_nil); - MAKE_ENV_PROC(global_env, "symbol?", proc_is_symbol); - MAKE_ENV_PROC(global_env, "string?", proc_is_string); - MAKE_ENV_PROC(global_env, "fixnum?", proc_is_fixnum); - MAKE_ENV_PROC(global_env, "pair?", proc_is_pair); - MAKE_ENV_PROC(global_env, "procedure?", proc_is_procedure); - MAKE_ENV_PROC(global_env, "error?", proc_is_error); - MAKE_ENV_PROC(global_env, "not", proc_not); - MAKE_ENV_PROC(global_env, "and", proc_and); - MAKE_ENV_PROC(global_env, "or", proc_or); - MAKE_ENV_PROC(global_env, "cond", proc_cond); - MAKE_ENV_PROC(global_env, "<", proc_num_less_than); - MAKE_ENV_PROC(global_env, "<=", proc_num_lesseq_than); - MAKE_ENV_PROC(global_env, ">", proc_num_greater_than); - MAKE_ENV_PROC(global_env, ">=", proc_num_greatereq_than); - MAKE_ENV_PROC(global_env, "=", proc_num_equal); - MAKE_ENV_PROC(global_env, "eq?", proc_equal); - MAKE_ENV_PROC(global_env, "def", proc_define); - MAKE_ENV_PROC(global_env, "set!", proc_set); - MAKE_ENV_PROC(global_env, "lambda", proc_lambda); - MAKE_ENV_PROC(global_env, "fun", proc_fun); - - // Runtime procedures. - MAKE_ENV_PROC(global_env, "supress-errors", proc_supress_errors); -} - -void -process_source(const StringView *source) { - Token *tokens = tokenize(source); - if (errors_n != 0) { - if (tokens != NULL) { - array_free(tokens); - } - return; - } - - Visitor visitor = (Visitor){ - .tokens = tokens, - .current = 0, - }; - while (has_next_token(&visitor) && peek_token(&visitor).type != TOKEN_EOF) { - // Check the root node stack size before parsing - size_t root_stack_size = array_size(gc.roots); - Object *root = parse_tree(&visitor); - array_head(gc.roots)->size = root_stack_size; - if (root == obj_err || errors_n != 0) { - break; - } - push_root(root); - - Object *result = eval(global_env, root); - if (result != obj_nil) { - display(result); - printf("\n"); - } - pop_root(); +int main(void) { + // Initialize GC. + gc_init(); + + // Initialize key-value objects. + Object *k1 = MAKE_SYM("Alice"); + Object *k2 = MAKE_SYM("Bob"); + Object *k3 = MAKE_SYM("Dog"); + Object *v1 = make_fixnum(10); + Object *v2 = make_fixnum(49); + Object *v3 = make_fixnum(333); + + // Initialize hash table. + HashTable *table = ht_init(); + + // Add some key-value pairs. + ht_insert(table, k1, v1); + ht_insert(table, k2, v2); + ht_insert(table, k3, v3); + + // Test lookups. + Object *alice_val = ht_lookup(table, k1); + Object *bob_val = ht_lookup(table, MAKE_SYM("Bob")); + Object *dog_val = ht_lookup(table, k3); + + if (v1 == alice_val) { + printf("Alice match!\n"); } - - if (tokens != NULL) { - array_free(tokens); - } -} - -#define REPL_PROMPT "bdl> " - -void -run_repl(void) { - printf("BDL REPL (Press Ctrl-D or Ctrl-C to exit)\n"); - while (true) { - printf(REPL_PROMPT); - StringView sv = read_line(); - if (sv.start == NULL) { - return; - } - process_source(&sv); - - // Check if there were any errors. - if (errors_n != 0 && !supress_errors) { - for (size_t i = 0; i < errors_n; i++) { - Error err = errors[i]; - for (size_t j = 0; j < err.col + sizeof(REPL_PROMPT) - 2; j++) { - putchar(' '); - } - printf("|\n"); - for (size_t j = 0; j < err.col + sizeof(REPL_PROMPT) - 2; j++) { - putchar(' '); - } - printf("%s\n", error_msgs[err.value]); - } - errors_n = 0; - continue; - } - } -} - -void -run_file(char *file_name) { - FILE *file = fopen(file_name, "r"); - if (!file) { - fprintf(stderr, "error: couldn't open input file: %s\n", file_name); - exit(EXIT_FAILURE); - } - - // Read entire file into memory. - fseek(file, 0, SEEK_END); - size_t file_size = ftell(file); - fseek(file, 0, SEEK_SET); - - char *source = malloc(file_size + 1); - fread(source, 1, file_size, file); - source[file_size] = 0; - - StringView sv = (StringView){ - .start = source, - .n = file_size, - }; - - process_source(&sv); - - // Check if there were any errors. - if (errors_n != 0 && !supress_errors) { - for (size_t i = 0; i < errors_n; i++) { - Error err = errors[i]; - fprintf(stderr, "%s", file_name); - if (err.line != 0) { - fprintf(stderr, ":%ld:%ld", err.line, err.col); - } - fprintf(stderr, ": %s\n", error_msgs[err.value]); - } - errors_n = 0; - } - - free(source); - fclose(file); -} - -#define STDIN_BUF_CAP 16 - -void -run_stdin(void) { - size_t buf_size = 0; - char *source = NULL; - array_init(source, STDIN_BUF_CAP); - - char c; - while ((c = getchar()) != EOF) { - array_push(source, c); - buf_size++; + if (v2 == bob_val) { + printf("Bob match!\n"); } - - StringView sv = (StringView){ - .start = source, - .n = buf_size, - }; - - process_source(&sv); - - // Check if there were any errors. - if (errors_n != 0 && !supress_errors) { - for (size_t i = 0; i < errors_n; i++) { - Error err = errors[i]; - fprintf(stderr, "stdin"); - if (err.line != 0) { - fprintf(stderr, ":%ld:%ld", err.line, err.col); - } - fprintf(stderr, ": %s\n", error_msgs[err.value]); - } - errors_n = 0; + if (v3 == dog_val) { + printf("Dog match!\n"); } - array_free(source); -} - -#ifndef BIN_NAME -#define BIN_NAME "bdl" -#endif - -void -print_usage(void) { - printf("Usage: %s [options] \n", BIN_NAME); - printf("\n"); - printf("\t-i\tInteractive mode (REPL).\n"); - printf("\n"); + ht_debug(table); + return 0; } -int -main(int argc, char *argv[]) { - init(); - - int option; - while ((option = getopt(argc, argv, "i")) != -1) { - switch (option) { - case 'i': { - // Interactive mode. - run_repl(); - return EXIT_SUCCESS; - } break; - default: { - print_usage(); - return EXIT_FAILURE; - } break; - } - } - - // Run from stdin. - if (optind == argc) { - run_stdin(); - return EXIT_SUCCESS; - } - - // Run from file. - while (optind < argc) { - char *file_name = argv[optind]; - run_file(file_name); - optind++; - } - - return EXIT_SUCCESS; -} +// void +// init(void) { +// // Initialize garbage collector. +// gc_init(); + +// // Initialize singletons. +// obj_nil = alloc_object(OBJ_TYPE_NIL); +// obj_true = alloc_object(OBJ_TYPE_BOOL); +// obj_false = alloc_object(OBJ_TYPE_BOOL); +// obj_err = alloc_object(OBJ_TYPE_ERR); +// obj_quote = make_symbol((StringView){"quote", 5}); +// proc_if = alloc_object(OBJ_TYPE_ERR); +// push_root(obj_nil); +// push_root(obj_true); +// push_root(obj_false); +// push_root(obj_err); +// push_root(obj_quote); +// push_root(proc_if); + +// // Global environment. +// global_env = env_create(NULL); +// // TODO: make sure we create symbols and strings only once (interning +// // strings?) +// push_active_env(global_env); + +// // Primitive symbols. +// MAKE_ENV_VAR(global_env, "else", obj_true); +// MAKE_ENV_VAR(global_env, "nil", obj_nil); +// MAKE_ENV_VAR(global_env, "if", proc_if); + +// // Primitive procedures. +// MAKE_ENV_PROC(global_env, "eval", proc_eval); +// MAKE_ENV_PROC(global_env, "quote", proc_quote); +// MAKE_ENV_PROC(global_env, "car", proc_car); +// MAKE_ENV_PROC(global_env, "cdr", proc_cdr); +// MAKE_ENV_PROC(global_env, "cons", proc_cons); +// MAKE_ENV_PROC(global_env, "list", proc_list); +// MAKE_ENV_PROC(global_env, "+", proc_sum); +// MAKE_ENV_PROC(global_env, "-", proc_sub); +// MAKE_ENV_PROC(global_env, "*", proc_mul); +// MAKE_ENV_PROC(global_env, "/", proc_div); +// MAKE_ENV_PROC(global_env, "%", proc_mod); +// MAKE_ENV_PROC(global_env, "print", proc_print); +// MAKE_ENV_PROC(global_env, "display", proc_display); +// MAKE_ENV_PROC(global_env, "newline", proc_newline); +// MAKE_ENV_PROC(global_env, "boolean?", proc_is_boolean); +// MAKE_ENV_PROC(global_env, "nil?", proc_is_nil); +// MAKE_ENV_PROC(global_env, "symbol?", proc_is_symbol); +// MAKE_ENV_PROC(global_env, "string?", proc_is_string); +// MAKE_ENV_PROC(global_env, "fixnum?", proc_is_fixnum); +// MAKE_ENV_PROC(global_env, "pair?", proc_is_pair); +// MAKE_ENV_PROC(global_env, "procedure?", proc_is_procedure); +// MAKE_ENV_PROC(global_env, "error?", proc_is_error); +// MAKE_ENV_PROC(global_env, "not", proc_not); +// MAKE_ENV_PROC(global_env, "and", proc_and); +// MAKE_ENV_PROC(global_env, "or", proc_or); +// MAKE_ENV_PROC(global_env, "cond", proc_cond); +// MAKE_ENV_PROC(global_env, "<", proc_num_less_than); +// MAKE_ENV_PROC(global_env, "<=", proc_num_lesseq_than); +// MAKE_ENV_PROC(global_env, ">", proc_num_greater_than); +// MAKE_ENV_PROC(global_env, ">=", proc_num_greatereq_than); +// MAKE_ENV_PROC(global_env, "=", proc_num_equal); +// MAKE_ENV_PROC(global_env, "eq?", proc_equal); +// MAKE_ENV_PROC(global_env, "def", proc_define); +// MAKE_ENV_PROC(global_env, "set!", proc_set); +// MAKE_ENV_PROC(global_env, "lambda", proc_lambda); +// MAKE_ENV_PROC(global_env, "fun", proc_fun); + +// // Runtime procedures. +// MAKE_ENV_PROC(global_env, "supress-errors", proc_supress_errors); +// } + +// void +// process_source(const StringView *source) { +// Token *tokens = tokenize(source); +// if (errors_n != 0) { +// if (tokens != NULL) { +// array_free(tokens); +// } +// return; +// } + +// Visitor visitor = (Visitor){ +// .tokens = tokens, +// .current = 0, +// }; +// while (has_next_token(&visitor) && peek_token(&visitor).type != TOKEN_EOF) { +// // Check the root node stack size before parsing +// size_t root_stack_size = array_size(gc.roots); +// Object *root = parse_tree(&visitor); +// array_head(gc.roots)->size = root_stack_size; +// if (root == obj_err || errors_n != 0) { +// break; +// } +// push_root(root); + +// Object *result = eval(global_env, root); +// if (result != obj_nil) { +// display(result); +// printf("\n"); +// } +// pop_root(); +// } + +// if (tokens != NULL) { +// array_free(tokens); +// } +// } + +// #define REPL_PROMPT "bdl> " + +// void +// run_repl(void) { +// printf("BDL REPL (Press Ctrl-D or Ctrl-C to exit)\n"); +// while (true) { +// printf(REPL_PROMPT); +// StringView sv = read_line(); +// if (sv.start == NULL) { +// return; +// } +// process_source(&sv); + +// // Check if there were any errors. +// if (errors_n != 0 && !supress_errors) { +// for (size_t i = 0; i < errors_n; i++) { +// Error err = errors[i]; +// for (size_t j = 0; j < err.col + sizeof(REPL_PROMPT) - 2; j++) { +// putchar(' '); +// } +// printf("|\n"); +// for (size_t j = 0; j < err.col + sizeof(REPL_PROMPT) - 2; j++) { +// putchar(' '); +// } +// printf("%s\n", error_msgs[err.value]); +// } +// errors_n = 0; +// continue; +// } +// } +// } + +// void +// run_file(char *file_name) { +// FILE *file = fopen(file_name, "r"); +// if (!file) { +// fprintf(stderr, "error: couldn't open input file: %s\n", file_name); +// exit(EXIT_FAILURE); +// } + +// // Read entire file into memory. +// fseek(file, 0, SEEK_END); +// size_t file_size = ftell(file); +// fseek(file, 0, SEEK_SET); + +// char *source = malloc(file_size + 1); +// fread(source, 1, file_size, file); +// source[file_size] = 0; + +// StringView sv = (StringView){ +// .start = source, +// .n = file_size, +// }; + +// process_source(&sv); + +// // Check if there were any errors. +// if (errors_n != 0 && !supress_errors) { +// for (size_t i = 0; i < errors_n; i++) { +// Error err = errors[i]; +// fprintf(stderr, "%s", file_name); +// if (err.line != 0) { +// fprintf(stderr, ":%ld:%ld", err.line, err.col); +// } +// fprintf(stderr, ": %s\n", error_msgs[err.value]); +// } +// errors_n = 0; +// } + +// free(source); +// fclose(file); +// } + +// #define STDIN_BUF_CAP 16 + +// void +// run_stdin(void) { +// size_t buf_size = 0; +// char *source = NULL; +// array_init(source, STDIN_BUF_CAP); + +// char c; +// while ((c = getchar()) != EOF) { +// array_push(source, c); +// buf_size++; +// } + +// StringView sv = (StringView){ +// .start = source, +// .n = buf_size, +// }; + +// process_source(&sv); + +// // Check if there were any errors. +// if (errors_n != 0 && !supress_errors) { +// for (size_t i = 0; i < errors_n; i++) { +// Error err = errors[i]; +// fprintf(stderr, "stdin"); +// if (err.line != 0) { +// fprintf(stderr, ":%ld:%ld", err.line, err.col); +// } +// fprintf(stderr, ": %s\n", error_msgs[err.value]); +// } +// errors_n = 0; +// } + +// array_free(source); +// } + +// #ifndef BIN_NAME +// #define BIN_NAME "bdl" +// #endif + +// void +// print_usage(void) { +// printf("Usage: %s [options] \n", BIN_NAME); +// printf("\n"); +// printf("\t-i\tInteractive mode (REPL).\n"); +// printf("\n"); +// } + +// int +// main(int argc, char *argv[]) { +// init(); + +// int option; +// while ((option = getopt(argc, argv, "i")) != -1) { +// switch (option) { +// case 'i': { +// // Interactive mode. +// run_repl(); +// return EXIT_SUCCESS; +// } break; +// default: { +// print_usage(); +// return EXIT_FAILURE; +// } break; +// } +// } + +// // Run from stdin. +// if (optind == argc) { +// run_stdin(); +// return EXIT_SUCCESS; +// } + +// // Run from file. +// while (optind < argc) { +// char *file_name = argv[optind]; +// run_file(file_name); +// optind++; +// } + +// return EXIT_SUCCESS; +// } diff --git a/src/bootstrap/objects.c b/src/bootstrap/objects.c index 3ca2e43..c71bc40 100644 --- a/src/bootstrap/objects.c +++ b/src/bootstrap/objects.c @@ -105,7 +105,7 @@ display(Object *root) { } bool -obj_eq(Object *a, Object* b) { +obj_eq(const Object *a, const Object* b) { if (a->type != b->type) { return false; } diff --git a/src/bootstrap/objects.h b/src/bootstrap/objects.h index 5968a44..ed623eb 100644 --- a/src/bootstrap/objects.h +++ b/src/bootstrap/objects.h @@ -65,7 +65,7 @@ void display(Object *root); void display_pair(Object *root); // Object comparison. -bool obj_eq(Object *a, Object* b); +bool obj_eq(const Object *a, const Object* b); // Utility macros. #define DEBUG_OBJ(MSG,OBJ) printf((MSG)); display(OBJ); printf("\n"); -- cgit v1.2.1