#ifndef BDL_HASHTABLE_H #define BDL_HASHTABLE_H #include "darray.h" // Minimum table capacity. #define HT_MIN_CAP 4 #define HT_MIN_SHIFT 2 // Adjust the load factor threshold at which the table will grow on insertion. #define HT_LOAD_THRESHOLD 0.8 typedef struct HashTablePair { void *key; void *value; } HashTablePair; struct HashTable; typedef uint64_t (HashFunction)(const struct HashTable *table, void *bytes); typedef bool (EqFunc)(void *a, void *b); typedef struct HashTable { // All available key-value pairs as a dynamic array. HashTablePair *pairs; // Hash function. HashFunction *hash_func; // Equality function. EqFunc *eq_func; // This table expects the number of buckets to grow in powers of two. To // speedup the default hashing, we memoize the number of bits equivalent to // that power of 2: // // cap := 1024 = 2 ^ 10, shift_amount := 10 // uint8_t shift_amount; } HashTable; // Hash a byte stream using a circular shift + XOR hash function. static inline uint64_t _xor_shift_hash(const char *key, size_t n) { uint64_t hash = 0x65d9d65f6a19574f; char *last = (char *)key + n; while (key != last) { hash ^= (uint64_t)*key++; hash = (hash << 8) | (hash >> (64 - 8)); } return hash; } // Use Fibonacci hashing to map a hash to a value in range of the table. static inline uint64_t _fibonacci_hash(uint64_t hash, size_t shift_amount) { return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); } static inline float ht_load_factor(const HashTable *table) { return (float)array_size(table->pairs) / (float)array_cap(table->pairs); } HashTable * ht_init(HashFunction *hash_func, EqFunc *eq_func) { HashTable *table = malloc(sizeof(HashTable)); *table = (HashTable){0}; array_init(table->pairs, HT_MIN_CAP); for (size_t i = 0; i < array_cap(table->pairs); i++) { table->pairs[i] = (HashTablePair){NULL, NULL}; } table->shift_amount = HT_MIN_SHIFT; table->hash_func = hash_func; table->eq_func = eq_func; return table; } void _ht_insert(HashTable *table, void *key, void *value) { size_t position = table->hash_func(table, key); size_t probe_position = position; // Verify the key in that position is free. If not, use linear probing to // find the next free slot. HashTablePair *pairs = table->pairs; bool update = false; while (true) { if (pairs[probe_position].key == NULL) { array_head(pairs)->size++; break; } if (table->eq_func(pairs[probe_position].key, key)) { update = true; break; } if (probe_position == array_cap(pairs) - 1) { probe_position = 0; } else { probe_position++; } } if (!update) { pairs[probe_position].key = key; pairs[probe_position].value = value; } else { pairs[probe_position].value = value; } } void _ht_maybe_grow(HashTable *table) { if (ht_load_factor(table) < HT_LOAD_THRESHOLD) { return; } // Create a new array with 2x capacity. HashTablePair *old_pairs = table->pairs; table->pairs = NULL; array_init(table->pairs, array_cap(old_pairs) * 2); for (size_t i = 0; i < array_cap(table->pairs); i++) { table->pairs[i] = (HashTablePair){NULL, NULL}; } table->shift_amount++; // Hash everything in the table for the new array capacity. for (size_t i = 0; i < array_cap(old_pairs); i++) { if (old_pairs[i].key != NULL) { _ht_insert(table, old_pairs[i].key, old_pairs[i].value); } } // Free old arrays. array_free(old_pairs); } void ht_insert(HashTable *table, void *key, void *value) { _ht_maybe_grow(table); _ht_insert(table, key, value); return; } void * ht_lookup(const HashTable *table, void *key) { size_t position = table->hash_func(table, key); size_t probe_position = position; // Verify the key in that position is the same. If not perform linear // probing to find it. HashTablePair *pairs = table->pairs; while (true) { if (pairs[probe_position].key == NULL) { return NULL; } if (table->eq_func(pairs[probe_position].key, key)) { break; } if (probe_position == array_cap(pairs) - 1) { probe_position = 0; } else { probe_position++; } if (probe_position == position) { return NULL; } } return pairs[probe_position].value; } void ht_free(HashTable *table) { if (table == NULL) { return; } array_free(table->pairs); free(table); } #endif // BDL_HASHTABLE_H