#ifndef BDL_HASHTABLE_H #define BDL_HASHTABLE_H #include "darray.h" #include "objects.h" // Minimum table capacity. #define HT_MIN_CAP 4 #define HT_MIN_SHIFT 2 // Adjust the load factor threshold at which the table will grow on insertion. #define HT_LOAD_THRESHOLD 0.8 typedef struct HashTablePair { Object *key; Object *value; } HashTablePair; typedef struct HashTable { // All available key-value pairs as a dynamic array. HashTablePair *pairs; // This table expects the number of buckets to grow in powers of two. To // speedup the default hashing, we memoize the number of bits equivalent to // that power of 2: // // cap := 1024 = 2 ^ 10, shift_amount := 10 // uint8_t shift_amount; } HashTable; // Hash a byte stream using a circular shift + XOR hash function. static inline uint64_t _xor_shift_hash(const char *key, size_t n) { uint64_t hash = 0x65d9d65f6a19574f; char *last = (char *)key + n; while (key != last) { hash ^= (uint64_t)*key++; hash = (hash << 8) | (hash >> (64 - 8)); } return hash; } // Use Fibonacci hashing to map a hash to a value in range of the table. static inline uint64_t _fibonacci_hash(uint64_t hash, size_t shift_amount) { return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); } uint64_t ht_hash(const HashTable *table, const Object *key) { uint64_t hash = 0; switch (key->type) { case OBJ_TYPE_FIXNUM: { hash = key->fixnum; } break; case OBJ_TYPE_STRING: { hash = _xor_shift_hash(key->string, array_size(key->string)); } break; case OBJ_TYPE_SYMBOL: { hash = _xor_shift_hash(key->symbol, array_size(key->symbol)); } break; case OBJ_TYPE_BOOL: case OBJ_TYPE_NIL: case OBJ_TYPE_PAIR: case OBJ_TYPE_LAMBDA: case OBJ_TYPE_PROCEDURE: case OBJ_TYPE_ERR: { hash = (uintptr_t)key; } break; } hash = _fibonacci_hash(hash, table->shift_amount); return hash; } static inline float ht_load_factor(const HashTable *table) { return (float)array_size(table->pairs) / (float)array_cap(table->pairs); } HashTable * ht_init(void) { HashTable *table = malloc(sizeof(HashTable)); table->pairs = NULL; array_init(table->pairs, HT_MIN_CAP); for (size_t i = 0; i < array_cap(table->pairs); i++) { table->pairs[i] = (HashTablePair){NULL, NULL}; } table->shift_amount = HT_MIN_SHIFT; return table; } void _ht_insert(HashTable *table, const Object *key, const Object *value) { size_t position = ht_hash(table, key); size_t probe_position = position; // Verify the key in that position is free. If not, use linear probing to // find the next free slot. HashTablePair *pairs = table->pairs; while (true) { if (pairs[probe_position].key == NULL) { array_head(pairs)->size++; break; } if (obj_eq(pairs[probe_position].key, key)) { break; } if (probe_position == array_cap(pairs) - 1) { probe_position = 0; } else { probe_position++; } } pairs[probe_position].key = (Object *)key; pairs[probe_position].value = (Object *)value; } void _ht_maybe_grow(HashTable *table) { HashTablePair *pairs = table->pairs; if (ht_load_factor(table) < HT_LOAD_THRESHOLD) { return; } // Create a new array with 2x capacity. table->pairs = NULL; array_init(table->pairs, array_cap(pairs) * 2); for (size_t i = 0; i < array_cap(table->pairs); i++) { table->pairs[i] = (HashTablePair){NULL, NULL}; } table->shift_amount++; // Hash everything in the table for the new array capacity. for (size_t i = 0; i < array_cap(pairs); i++) { if (pairs[i].key != NULL) { _ht_insert(table, pairs[i].key, pairs[i].value); } } // Free the old array. array_free(pairs); } void ht_insert(HashTable *table, const Object *key, const Object *value) { _ht_maybe_grow(table); _ht_insert(table, key, value); return; } Object * ht_lookup(const HashTable *table, const Object *key) { size_t position = ht_hash(table, key); size_t probe_position = position; // Verify the key in that position is the same. If not perform linear // probing to find it. HashTablePair *pairs = table->pairs; while (true) { if (pairs[probe_position].key == NULL) { return NULL; } if (obj_eq(pairs[probe_position].key, key)) { break; } if (probe_position == array_cap(pairs) - 1) { probe_position = 0; } else { probe_position++; } if (probe_position == position) { return NULL; } } return pairs[probe_position].value; } void ht_free(HashTable *table) { if (table == NULL) { return; } if (table->pairs == NULL) { return; } array_free(table->pairs); free(table); } #endif // BDL_HASHTABLE_H