From eeff5e273f22aa28e81ab080e9ffdce85ac394b8 Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Fri, 22 Oct 2021 09:59:31 +0200 Subject: Prepare skeleton for bytecode interpreter --- src/treewalk/hashtable.h | 191 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100644 src/treewalk/hashtable.h (limited to 'src/treewalk/hashtable.h') diff --git a/src/treewalk/hashtable.h b/src/treewalk/hashtable.h new file mode 100644 index 0000000..8f210e3 --- /dev/null +++ b/src/treewalk/hashtable.h @@ -0,0 +1,191 @@ +#ifndef BDL_HASHTABLE_H +#define BDL_HASHTABLE_H + +#include "darray.h" +#include "objects.h" + +// Minimum table capacity. +#define HT_MIN_CAP 4 +#define HT_MIN_SHIFT 2 + +// Adjust the load factor threshold at which the table will grow on insertion. +#define HT_LOAD_THRESHOLD 0.8 + +typedef struct HashTablePair { + Object *key; + Object *value; +} HashTablePair; + +typedef struct HashTable { + // All available key-value pairs as a dynamic array. + HashTablePair *pairs; + + // This table expects the number of buckets to grow in powers of two. To + // speedup the default hashing, we memoize the number of bits equivalent to + // that power of 2: + // + // cap := 1024 = 2 ^ 10, shift_amount := 10 + // + uint8_t shift_amount; +} HashTable; + +// Hash a byte stream using a circular shift + XOR hash function. +static inline uint64_t +_xor_shift_hash(const char *key, size_t n) { + uint64_t hash = 0x65d9d65f6a19574f; + char *last = (char *)key + n; + while (key != last) { + hash ^= (uint64_t)*key++; + hash = (hash << 8) | (hash >> (64 - 8)); + } + return hash; +} + +// Use Fibonacci hashing to map a hash to a value in range of the table. +static inline uint64_t +_fibonacci_hash(uint64_t hash, size_t shift_amount) { + return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); +} + +uint64_t +ht_hash(const HashTable *table, const Object *key) { + uint64_t hash = 0; + switch (key->type) { + case OBJ_TYPE_FIXNUM: { + hash = key->fixnum; + } break; + case OBJ_TYPE_STRING: { + hash = _xor_shift_hash(key->string, array_size(key->string)); + } break; + case OBJ_TYPE_SYMBOL: { + hash = _xor_shift_hash(key->symbol, array_size(key->symbol)); + } break; + case OBJ_TYPE_BOOL: + case OBJ_TYPE_NIL: + case OBJ_TYPE_PAIR: + case OBJ_TYPE_LAMBDA: + case OBJ_TYPE_PROCEDURE: + case OBJ_TYPE_ERR: { + hash = (uintptr_t)key; + } break; + } + hash = _fibonacci_hash(hash, table->shift_amount); + return hash; +} + +static inline float +ht_load_factor(const HashTable *table) { + return (float)array_size(table->pairs) / (float)array_cap(table->pairs); +} + +HashTable * +ht_init(void) { + HashTable *table = malloc(sizeof(HashTable)); + table->pairs = NULL; + array_init(table->pairs, HT_MIN_CAP); + for (size_t i = 0; i < array_cap(table->pairs); i++) { + table->pairs[i] = (HashTablePair){NULL, NULL}; + } + table->shift_amount = HT_MIN_SHIFT; + return table; +} + +void +_ht_insert(HashTable *table, const Object *key, const Object *value) { + size_t position = ht_hash(table, key); + size_t probe_position = position; + + // Verify the key in that position is free. If not, use linear probing to + // find the next free slot. + HashTablePair *pairs = table->pairs; + while (true) { + if (pairs[probe_position].key == NULL) { + array_head(pairs)->size++; + break; + } + if (obj_eq(pairs[probe_position].key, key)) { + break; + } + if (probe_position == array_cap(pairs) - 1) { + probe_position = 0; + } else { + probe_position++; + } + } + pairs[probe_position].key = (Object *)key; + pairs[probe_position].value = (Object *)value; +} + +void +_ht_maybe_grow(HashTable *table) { + HashTablePair *pairs = table->pairs; + if (ht_load_factor(table) < HT_LOAD_THRESHOLD) { + return; + } + + // Create a new array with 2x capacity. + table->pairs = NULL; + array_init(table->pairs, array_cap(pairs) * 2); + for (size_t i = 0; i < array_cap(table->pairs); i++) { + table->pairs[i] = (HashTablePair){NULL, NULL}; + } + table->shift_amount++; + + // Hash everything in the table for the new array capacity. + for (size_t i = 0; i < array_cap(pairs); i++) { + if (pairs[i].key != NULL) { + _ht_insert(table, pairs[i].key, pairs[i].value); + } + } + + // Free the old array. + array_free(pairs); +} + +void +ht_insert(HashTable *table, const Object *key, const Object *value) { + _ht_maybe_grow(table); + _ht_insert(table, key, value); + return; +} + +Object * +ht_lookup(const HashTable *table, const Object *key) { + size_t position = ht_hash(table, key); + size_t probe_position = position; + + // Verify the key in that position is the same. If not perform linear + // probing to find it. + HashTablePair *pairs = table->pairs; + while (true) { + if (pairs[probe_position].key == NULL) { + return NULL; + } + if (obj_eq(pairs[probe_position].key, key)) { + break; + } + if (probe_position == array_cap(pairs) - 1) { + probe_position = 0; + } else { + probe_position++; + } + if (probe_position == position) { + return NULL; + } + } + return pairs[probe_position].value; +} + +void +ht_free(HashTable *table) { + if (table == NULL) { + return; + } + if (table->pairs == NULL) { + return; + } + array_free(table->pairs); + free(table); +} + +#endif // BDL_HASHTABLE_H -- cgit v1.2.1