From 6e27b20d10306d53cd838ef375fe80571dfe91ff Mon Sep 17 00:00:00 2001 From: Bad Diode Date: Sun, 24 Oct 2021 11:52:56 +0200 Subject: Add updated hash table with intern key-values --- src/bytecode/compiler.h | 10 ++- src/bytecode/hashtable.h | 215 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 223 insertions(+), 2 deletions(-) create mode 100644 src/bytecode/hashtable.h (limited to 'src/bytecode') diff --git a/src/bytecode/compiler.h b/src/bytecode/compiler.h index d404a5a..ec51942 100755 --- a/src/bytecode/compiler.h +++ b/src/bytecode/compiler.h @@ -33,11 +33,16 @@ has_next_token(const Visitor *visitor) { void emit_constant(Chunk *chunk, Token tok, Object obj) { - // TODO: Should we deduplicate constants? For example why store a number - // more than once instead of reusing the existing index? + size_t prev_size = array_size(chunk->constants); size_t num_idx = add_constant(chunk, obj); add_code(chunk, OP_CONSTANT, tok.line, tok.column); add_code(chunk, num_idx, tok.line, tok.column); + + // If the non value constant was already present we need to properly free + // the memory from the object given to this function. + if (prev_size == array_size(chunk->constants)) { + object_free(obj); + } } void @@ -180,6 +185,7 @@ parse_list(Chunk *chunk, Visitor *vs, Token start) { .line = start.line, .col = start.column, }); + return; } Token tok = next_token(vs); switch (tok.type) { diff --git a/src/bytecode/hashtable.h b/src/bytecode/hashtable.h new file mode 100644 index 0000000..48665d3 --- /dev/null +++ b/src/bytecode/hashtable.h @@ -0,0 +1,215 @@ +#ifndef BDL_HASHTABLE_H +#define BDL_HASHTABLE_H + +#include "darray.h" +#include "objects.h" + +// Minimum table capacity. +#define HT_MIN_CAP 4 +#define HT_MIN_SHIFT 2 + +// Adjust the load factor threshold at which the table will grow on insertion. +#define HT_LOAD_THRESHOLD 0.8 + +typedef struct HashTablePair { + Object *key; + Object *value; +} HashTablePair; + +struct HashTable; +typedef uint64_t (HashFunction)(const struct HashTable *table, void *bytes); + +typedef struct HashTable { + // All available key-value pairs as a dynamic array. + HashTablePair *pairs; + + // Internal storage. + Object *keys; + Object *values; + + // Hash function. + HashFunction *hash_func; + + // This table expects the number of buckets to grow in powers of two. To + // speedup the default hashing, we memoize the number of bits equivalent to + // that power of 2: + // + // cap := 1024 = 2 ^ 10, shift_amount := 10 + // + uint8_t shift_amount; +} HashTable; + +// Hash a byte stream using a circular shift + XOR hash function. +static inline uint64_t +_xor_shift_hash(const char *key, size_t n) { + uint64_t hash = 0x65d9d65f6a19574f; + char *last = (char *)key + n; + while (key != last) { + hash ^= (uint64_t)*key++; + hash = (hash << 8) | (hash >> (64 - 8)); + } + return hash; +} + +// Use Fibonacci hashing to map a hash to a value in range of the table. +static inline uint64_t +_fibonacci_hash(uint64_t hash, size_t shift_amount) { + return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); +} + +uint64_t +_symbol_hash(const HashTable *table, void *key) { + Object *obj = key; + uint64_t hash = _xor_shift_hash(obj->text, array_size(obj->text)); + hash = _fibonacci_hash(hash, table->shift_amount); + return hash; +} + +static inline float +ht_load_factor(const HashTable *table) { + return (float)array_size(table->pairs) / (float)array_cap(table->pairs); +} + +HashTable * +ht_init(void) { + HashTable *table = malloc(sizeof(HashTable)); + *table = (HashTable){0}; + array_init(table->pairs, HT_MIN_CAP); + array_init(table->keys, HT_MIN_CAP); + array_init(table->values, HT_MIN_CAP); + for (size_t i = 0; i < array_cap(table->pairs); i++) { + table->pairs[i] = (HashTablePair){NULL, NULL}; + } + table->shift_amount = HT_MIN_SHIFT; + table->hash_func = _symbol_hash; + return table; +} + +void +_ht_insert(HashTable *table, Object key, Object value) { + size_t position = table->hash_func(table, &key); + size_t probe_position = position; + + // Verify the key in that position is free. If not, use linear probing to + // find the next free slot. + HashTablePair *pairs = table->pairs; + bool update = false; + while (true) { + if (pairs[probe_position].key == NULL) { + array_head(pairs)->size++; + break; + } + if (object_equal(*pairs[probe_position].key, key)) { + update = true; + break; + } + if (probe_position == array_cap(pairs) - 1) { + probe_position = 0; + } else { + probe_position++; + } + } + + if (!update) { + array_push(table->keys, object_copy(key)); + array_push(table->values, object_copy(value)); + pairs[probe_position].key = &table->keys[array_size(table->keys) - 1]; + pairs[probe_position].value = &table->values[array_size(table->values) - 1]; + } else { + object_free(*pairs[probe_position].value); + *pairs[probe_position].value = value; + } +} + +void +_ht_maybe_grow(HashTable *table) { + if (ht_load_factor(table) < HT_LOAD_THRESHOLD) { + return; + } + + // Create a new array with 2x capacity. + HashTablePair *old_pairs = table->pairs; + Object *old_keys = table->keys; + Object *old_values = table->values; + table->pairs = NULL; + table->keys = NULL; + table->values = NULL; + array_init(table->pairs, array_cap(old_pairs) * 2); + array_init(table->keys, array_cap(old_pairs) * 2); + array_init(table->values, array_cap(old_pairs) * 2); + for (size_t i = 0; i < array_cap(table->pairs); i++) { + table->pairs[i] = (HashTablePair){NULL, NULL}; + } + table->shift_amount++; + + // Hash everything in the table for the new array capacity. + for (size_t i = 0; i < array_cap(old_pairs); i++) { + if (old_pairs[i].key != NULL) { + _ht_insert(table, *old_pairs[i].key, *old_pairs[i].value); + } + } + + // Free old arrays. + array_free(old_pairs); + for (size_t i = 0; i < array_size(old_keys); i++) { + Object key = old_keys[i]; + Object value = old_values[i]; + object_free(key); + object_free(value); + } + array_free(old_keys); + array_free(old_values); +} + +void +ht_insert(HashTable *table, Object key, Object value) { + _ht_maybe_grow(table); + _ht_insert(table, key, value); + return; +} + +Object * +ht_lookup(const HashTable *table, Object key) { + size_t position = table->hash_func(table, &key); + size_t probe_position = position; + + // Verify the key in that position is the same. If not perform linear + // probing to find it. + HashTablePair *pairs = table->pairs; + while (true) { + if (pairs[probe_position].key == NULL) { + return NULL; + } + if (object_equal(*pairs[probe_position].key, key)) { + break; + } + if (probe_position == array_cap(pairs) - 1) { + probe_position = 0; + } else { + probe_position++; + } + if (probe_position == position) { + return NULL; + } + } + return pairs[probe_position].value; +} + +void +ht_free(HashTable *table) { + if (table == NULL) { + return; + } + array_free(table->pairs); + for (size_t i = 0; i < array_size(table->keys); i++) { + Object key = table->keys[i]; + Object value = table->values[i]; + object_free(key); + object_free(value); + } + array_free(table->keys); + array_free(table->values); + free(table); +} + +#endif // BDL_HASHTABLE_H -- cgit v1.2.1