From af3f2ebc42bf9dc64fc581b9f8f79e98895cb417 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Sat, 30 Oct 2021 10:13:11 +0200 Subject: Add hashtable for Environment tracking --- src/hashtable.h | 180 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/parser.c | 53 +++++++++++++++++ src/parser.h | 7 +++ 3 files changed, 240 insertions(+) create mode 100644 src/hashtable.h (limited to 'src') diff --git a/src/hashtable.h b/src/hashtable.h new file mode 100644 index 0000000..faa8591 --- /dev/null +++ b/src/hashtable.h @@ -0,0 +1,180 @@ +#ifndef BDL_HASHTABLE_H +#define BDL_HASHTABLE_H + +#include "darray.h" + +// Minimum table capacity. +#define HT_MIN_CAP 4 +#define HT_MIN_SHIFT 2 + +// Adjust the load factor threshold at which the table will grow on insertion. +#define HT_LOAD_THRESHOLD 0.8 + +typedef struct HashTablePair { + void *key; + void *value; +} HashTablePair; + +struct HashTable; +typedef uint64_t (HashFunction)(const struct HashTable *table, void *bytes); +typedef bool (EqFunc)(void *a, void *b); + +typedef struct HashTable { + // All available key-value pairs as a dynamic array. + HashTablePair *pairs; + + // Hash function. + HashFunction *hash_func; + + // Equality function. + EqFunc *eq_func; + + // This table expects the number of buckets to grow in powers of two. To + // speedup the default hashing, we memoize the number of bits equivalent to + // that power of 2: + // + // cap := 1024 = 2 ^ 10, shift_amount := 10 + // + uint8_t shift_amount; +} HashTable; + +// Hash a byte stream using a circular shift + XOR hash function. +static inline uint64_t +_xor_shift_hash(const char *key, size_t n) { + uint64_t hash = 0x65d9d65f6a19574f; + char *last = (char *)key + n; + while (key != last) { + hash ^= (uint64_t)*key++; + hash = (hash << 8) | (hash >> (64 - 8)); + } + return hash; +} + +// Use Fibonacci hashing to map a hash to a value in range of the table. +static inline uint64_t +_fibonacci_hash(uint64_t hash, size_t shift_amount) { + return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); +} + +static inline float +ht_load_factor(const HashTable *table) { + return (float)array_size(table->pairs) / (float)array_cap(table->pairs); +} + +HashTable * +ht_init(HashFunction *hash_func, EqFunc *eq_func) { + HashTable *table = malloc(sizeof(HashTable)); + *table = (HashTable){0}; + array_init(table->pairs, HT_MIN_CAP); + for (size_t i = 0; i < array_cap(table->pairs); i++) { + table->pairs[i] = (HashTablePair){NULL, NULL}; + } + table->shift_amount = HT_MIN_SHIFT; + table->hash_func = hash_func; + table->eq_func = eq_func; + return table; +} + +void +_ht_insert(HashTable *table, void *key, void *value) { + size_t position = table->hash_func(table, key); + size_t probe_position = position; + + // Verify the key in that position is free. If not, use linear probing to + // find the next free slot. + HashTablePair *pairs = table->pairs; + bool update = false; + while (true) { + if (pairs[probe_position].key == NULL) { + array_head(pairs)->size++; + break; + } + if (table->eq_func(pairs[probe_position].key, key)) { + update = true; + break; + } + if (probe_position == array_cap(pairs) - 1) { + probe_position = 0; + } else { + probe_position++; + } + } + + if (!update) { + pairs[probe_position].key = key; + pairs[probe_position].value = value; + } else { + pairs[probe_position].value = value; + } +} + +void +_ht_maybe_grow(HashTable *table) { + if (ht_load_factor(table) < HT_LOAD_THRESHOLD) { + return; + } + + // Create a new array with 2x capacity. + HashTablePair *old_pairs = table->pairs; + table->pairs = NULL; + array_init(table->pairs, array_cap(old_pairs) * 2); + for (size_t i = 0; i < array_cap(table->pairs); i++) { + table->pairs[i] = (HashTablePair){NULL, NULL}; + } + table->shift_amount++; + + // Hash everything in the table for the new array capacity. + for (size_t i = 0; i < array_cap(old_pairs); i++) { + if (old_pairs[i].key != NULL) { + _ht_insert(table, old_pairs[i].key, old_pairs[i].value); + } + } + + // Free old arrays. + array_free(old_pairs); +} + +void +ht_insert(HashTable *table, void *key, void *value) { + _ht_maybe_grow(table); + _ht_insert(table, key, value); + return; +} + +void * +ht_lookup(const HashTable *table, void *key) { + size_t position = table->hash_func(table, key); + size_t probe_position = position; + + // Verify the key in that position is the same. If not perform linear + // probing to find it. + HashTablePair *pairs = table->pairs; + while (true) { + if (pairs[probe_position].key == NULL) { + return NULL; + } + if (table->eq_func(pairs[probe_position].key, key)) { + break; + } + if (probe_position == array_cap(pairs) - 1) { + probe_position = 0; + } else { + probe_position++; + } + if (probe_position == position) { + return NULL; + } + } + return pairs[probe_position].value; +} + +void +ht_free(HashTable *table) { + if (table == NULL) { + return; + } + array_free(table->pairs); + free(table); +} + +#endif // BDL_HASHTABLE_H diff --git a/src/parser.c b/src/parser.c index 2215fad..e16fc4a 100644 --- a/src/parser.c +++ b/src/parser.c @@ -4,6 +4,15 @@ static Object **objects = NULL; static Root *roots = NULL; + +uint64_t +symbol_hash(const HashTable *table, void *key) { + Object *obj = key; + uint64_t hash = _xor_shift_hash(obj->text, array_size(obj->text)); + hash = _fibonacci_hash(hash, table->shift_amount); + return hash; +} + Token peek_token(const Parser *parser) { return parser->tokens[parser->current]; @@ -391,6 +400,16 @@ parse(Token *tokens, Errors *errors) { array_push(roots, root); } + // TEST INSERTION/LOOKUP. + HashTable *table = ht_init(symbol_hash, object_equal); + ht_insert(table, &roots[0][0], &roots[1][0]); + ht_insert(table, &roots[2][0], &roots[3][0]); + Object *val = ht_lookup(table, &roots[2][0]); + object_display(val); + val = ht_lookup(table, &roots[0][0]); + object_display(val); + ht_free(table); + // Perform semantic analysis. // TODO: Check that symbols are defined before usage. // TODO: Remove unnecessary statements. @@ -522,3 +541,37 @@ object_display(Object *obj) { } return; } + +bool +object_equal(Object *a, Object *b) { + if (a == NULL || b == NULL || a->type != b->type) { + return false; + } + switch (a->type) { + case OBJ_TYPE_TRUE: + case OBJ_TYPE_FALSE: { + return true; + } break; + case OBJ_TYPE_FIXNUM: { + return a->fixnum == b->fixnum; + } break; + case OBJ_TYPE_SYMBOL: + case OBJ_TYPE_STRING: { + if (array_size(a->text) != array_size(b->text)) { + return false; + } + for (size_t i = 0; i < array_size(a->text); i++) { + if (a->text[i] != b->text[i]) { + return false; + } + } + } break; + case OBJ_TYPE_LAMBDA: { + // return a->closure == b.closure; + } break; + default: { + return false; + } break; + } + return true; +} diff --git a/src/parser.h b/src/parser.h index 14d9df5..d1eddc7 100755 --- a/src/parser.h +++ b/src/parser.h @@ -2,6 +2,7 @@ #define BDL_PARSER_H #include "lexer.h" +#include "hashtable.h" typedef enum ObjectType { OBJ_TYPE_NIL, @@ -58,6 +59,11 @@ typedef struct Object { size_t col; } Object; +typedef struct Environment { + HashTable *table; + struct Environment *parent; +}; + typedef struct Parser { Token *tokens; size_t current; @@ -86,6 +92,7 @@ Root * parse(Token *tokens, Errors *errors); // Object operations. void object_display(Object *obj); +bool object_equal(Object *a, Object *b); // Manage resources. Object * object_alloc(Token tok, ObjectType type); -- cgit v1.2.1