From 8a8b6595ebec509f1b5ae155a38a99347820225c Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Thu, 21 Oct 2021 14:28:37 +0200 Subject: Add WIP hashtable implementation --- src/bootstrap/hashtable.h | 158 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) create mode 100644 src/bootstrap/hashtable.h (limited to 'src/bootstrap/hashtable.h') diff --git a/src/bootstrap/hashtable.h b/src/bootstrap/hashtable.h new file mode 100644 index 0000000..baffec0 --- /dev/null +++ b/src/bootstrap/hashtable.h @@ -0,0 +1,158 @@ +#ifndef BDL_HASHTABLE_H +#define BDL_HASHTABLE_H + +#include "darray.h" +#include "objects.h" + +// Minimum table capacity. +#define HT_MIN_CAP 4 +#define HT_MIN_SHIFT 2 + +// Adjust the load factor threshold at which the table will grow on insertion. +#define HT_LOAD_THRESHOLD 0.75 + +typedef struct HashTablePair { + Object *key; + Object *value; +} HashTablePair; + +typedef struct HashTable { + // All available key-value pairs as a dynamic array. + HashTablePair *pairs; + + // This table expects the number of buckets to grow in powers of two. To + // speedup the default hashing, we memoize the number of bits equivalent to + // that power of 2: + // + // cap := 1024 = 2 ^ 10, shift_amount := 10 + // + uint8_t shift_amount; +} HashTable; + +// Use Fibonacci hashing to map a hash to a value in range of the table. +static inline uint64_t +_fibonacci_hash(uint64_t hash, size_t shift_amount) { + return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); +} + +uint64_t +ht_hash(const HashTable *table, const Object *key) { + uint64_t hash = 0x65d9d65f6a19574f; + // printf("HASHING: "); + // display(key); + // printf("\n"); + return 0; +} + +static inline size_t +ht_load_factor(const HashTable *table) { + return array_size(table->pairs) / array_cap(table->pairs); +} + +HashTable * +ht_init(void) { + HashTable *table = malloc(sizeof(HashTable)); + table->pairs = NULL; + array_init(table->pairs, HT_MIN_CAP); + // Clear the table ensuring all references point to NULL. + for (size_t i = 0; i < array_cap(table->pairs); i++) { + table->pairs[i] = (HashTablePair){NULL, NULL}; + } + table->shift_amount = HT_MIN_SHIFT; + return table; +} + +void +ht_insert(HashTable *table, const Object *key, const Object *value) { + // TODO: Grow if needed. + size_t position = ht_hash(table, key); + size_t probe_position = position; + + // Verify the key in that position is free. If not, use linear probing to + // find the next free slot. + HashTablePair *pairs = table->pairs; + while (true) { + if (pairs[probe_position].key == NULL) { + break; + } + if (obj_eq(pairs[probe_position].key, key)) { + break; + } + if (probe_position == array_cap(pairs)) { + probe_position = 0; + } else { + probe_position++; + } + } + pairs[probe_position].key = (Object *)key; + pairs[probe_position].value = (Object *)value; + return; +} + +Object * +ht_lookup(const HashTable *table, const Object *key) { + size_t position = ht_hash(table, key); + size_t probe_position = position; + + // Verify the key in that position is the same. If not perform linear + // probing to find it. + HashTablePair *pairs = table->pairs; + while (true) { + if (pairs[probe_position].key == NULL) { + return NULL; + } + if (obj_eq(pairs[probe_position].key, key)) { + break; + } + if (probe_position == array_cap(pairs)) { + probe_position = 0; + } else { + probe_position++; + } + } + return pairs[probe_position].value; +} + +void +ht_debug(HashTable *table) { + HashTablePair *pairs = table->pairs; + for (size_t i = 0; i < array_cap(pairs); i++) { + printf("i: %ld ", i); + if (pairs[i].key == NULL) { + printf("EMPTY\n"); + } else { + printf("key: "); + display(pairs[i].key); + printf(" value: "); + display(pairs[i].value); + printf("\n"); + } + } +} + +// // Use Fibonacci hashing to map a hash to a value in range of the table. +// static inline +// uint64_t _fibonacci_hash(uint64_t hash, size_t shift_amount) { +// return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); +// } + +// // Hash a null terminated string using a circular shift + XOR hash function. +// static inline +// uint64_t _string_hash(const HashStash *table, const char *key) { +// uint64_t hash = 0x65d9d65f6a19574f; +// while (*key) { +// hash ^= (uint64_t)*key++; +// hash = (hash << 8) | (hash >> (64 - 8)); +// } +// return _fibonacci_hash(hash, table->shift_amount); +// } + +// // Return the key as an unsigned integer. +// static inline +// uint64_t _number_hash(const HashStash *table, const char *key) { +// uint64_t hash = 0; +// memcpy(&hash, key, table->key_length); +// return _fibonacci_hash(hash, table->shift_amount); +// } + +#endif // BDL_HASHTABLE_H -- cgit v1.2.1