diff options
Diffstat (limited to 'src/bootstrap/hashtable.h')
-rw-r--r-- | src/bootstrap/hashtable.h | 158 |
1 files changed, 158 insertions, 0 deletions
diff --git a/src/bootstrap/hashtable.h b/src/bootstrap/hashtable.h new file mode 100644 index 0000000..baffec0 --- /dev/null +++ b/src/bootstrap/hashtable.h | |||
@@ -0,0 +1,158 @@ | |||
1 | #ifndef BDL_HASHTABLE_H | ||
2 | #define BDL_HASHTABLE_H | ||
3 | |||
4 | #include "darray.h" | ||
5 | #include "objects.h" | ||
6 | |||
7 | // Minimum table capacity. | ||
8 | #define HT_MIN_CAP 4 | ||
9 | #define HT_MIN_SHIFT 2 | ||
10 | |||
11 | // Adjust the load factor threshold at which the table will grow on insertion. | ||
12 | #define HT_LOAD_THRESHOLD 0.75 | ||
13 | |||
14 | typedef struct HashTablePair { | ||
15 | Object *key; | ||
16 | Object *value; | ||
17 | } HashTablePair; | ||
18 | |||
19 | typedef struct HashTable { | ||
20 | // All available key-value pairs as a dynamic array. | ||
21 | HashTablePair *pairs; | ||
22 | |||
23 | // This table expects the number of buckets to grow in powers of two. To | ||
24 | // speedup the default hashing, we memoize the number of bits equivalent to | ||
25 | // that power of 2: | ||
26 | // | ||
27 | // cap := 1024 = 2 ^ 10, shift_amount := 10 | ||
28 | // | ||
29 | uint8_t shift_amount; | ||
30 | } HashTable; | ||
31 | |||
32 | // Use Fibonacci hashing to map a hash to a value in range of the table. | ||
33 | static inline uint64_t | ||
34 | _fibonacci_hash(uint64_t hash, size_t shift_amount) { | ||
35 | return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); | ||
36 | } | ||
37 | |||
38 | uint64_t | ||
39 | ht_hash(const HashTable *table, const Object *key) { | ||
40 | uint64_t hash = 0x65d9d65f6a19574f; | ||
41 | // printf("HASHING: "); | ||
42 | // display(key); | ||
43 | // printf("\n"); | ||
44 | return 0; | ||
45 | } | ||
46 | |||
47 | static inline size_t | ||
48 | ht_load_factor(const HashTable *table) { | ||
49 | return array_size(table->pairs) / array_cap(table->pairs); | ||
50 | } | ||
51 | |||
52 | HashTable * | ||
53 | ht_init(void) { | ||
54 | HashTable *table = malloc(sizeof(HashTable)); | ||
55 | table->pairs = NULL; | ||
56 | array_init(table->pairs, HT_MIN_CAP); | ||
57 | // Clear the table ensuring all references point to NULL. | ||
58 | for (size_t i = 0; i < array_cap(table->pairs); i++) { | ||
59 | table->pairs[i] = (HashTablePair){NULL, NULL}; | ||
60 | } | ||
61 | table->shift_amount = HT_MIN_SHIFT; | ||
62 | return table; | ||
63 | } | ||
64 | |||
65 | void | ||
66 | ht_insert(HashTable *table, const Object *key, const Object *value) { | ||
67 | // TODO: Grow if needed. | ||
68 | size_t position = ht_hash(table, key); | ||
69 | size_t probe_position = position; | ||
70 | |||
71 | // Verify the key in that position is free. If not, use linear probing to | ||
72 | // find the next free slot. | ||
73 | HashTablePair *pairs = table->pairs; | ||
74 | while (true) { | ||
75 | if (pairs[probe_position].key == NULL) { | ||
76 | break; | ||
77 | } | ||
78 | if (obj_eq(pairs[probe_position].key, key)) { | ||
79 | break; | ||
80 | } | ||
81 | if (probe_position == array_cap(pairs)) { | ||
82 | probe_position = 0; | ||
83 | } else { | ||
84 | probe_position++; | ||
85 | } | ||
86 | } | ||
87 | pairs[probe_position].key = (Object *)key; | ||
88 | pairs[probe_position].value = (Object *)value; | ||
89 | return; | ||
90 | } | ||
91 | |||
92 | Object * | ||
93 | ht_lookup(const HashTable *table, const Object *key) { | ||
94 | size_t position = ht_hash(table, key); | ||
95 | size_t probe_position = position; | ||
96 | |||
97 | // Verify the key in that position is the same. If not perform linear | ||
98 | // probing to find it. | ||
99 | HashTablePair *pairs = table->pairs; | ||
100 | while (true) { | ||
101 | if (pairs[probe_position].key == NULL) { | ||
102 | return NULL; | ||
103 | } | ||
104 | if (obj_eq(pairs[probe_position].key, key)) { | ||
105 | break; | ||
106 | } | ||
107 | if (probe_position == array_cap(pairs)) { | ||
108 | probe_position = 0; | ||
109 | } else { | ||
110 | probe_position++; | ||
111 | } | ||
112 | } | ||
113 | return pairs[probe_position].value; | ||
114 | } | ||
115 | |||
116 | void | ||
117 | ht_debug(HashTable *table) { | ||
118 | HashTablePair *pairs = table->pairs; | ||
119 | for (size_t i = 0; i < array_cap(pairs); i++) { | ||
120 | printf("i: %ld ", i); | ||
121 | if (pairs[i].key == NULL) { | ||
122 | printf("EMPTY\n"); | ||
123 | } else { | ||
124 | printf("key: "); | ||
125 | display(pairs[i].key); | ||
126 | printf(" value: "); | ||
127 | display(pairs[i].value); | ||
128 | printf("\n"); | ||
129 | } | ||
130 | } | ||
131 | } | ||
132 | |||
133 | // // Use Fibonacci hashing to map a hash to a value in range of the table. | ||
134 | // static inline | ||
135 | // uint64_t _fibonacci_hash(uint64_t hash, size_t shift_amount) { | ||
136 | // return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount); | ||
137 | // } | ||
138 | |||
139 | // // Hash a null terminated string using a circular shift + XOR hash function. | ||
140 | // static inline | ||
141 | // uint64_t _string_hash(const HashStash *table, const char *key) { | ||
142 | // uint64_t hash = 0x65d9d65f6a19574f; | ||
143 | // while (*key) { | ||
144 | // hash ^= (uint64_t)*key++; | ||
145 | // hash = (hash << 8) | (hash >> (64 - 8)); | ||
146 | // } | ||
147 | // return _fibonacci_hash(hash, table->shift_amount); | ||
148 | // } | ||
149 | |||
150 | // // Return the key as an unsigned integer. | ||
151 | // static inline | ||
152 | // uint64_t _number_hash(const HashStash *table, const char *key) { | ||
153 | // uint64_t hash = 0; | ||
154 | // memcpy(&hash, key, table->key_length); | ||
155 | // return _fibonacci_hash(hash, table->shift_amount); | ||
156 | // } | ||
157 | |||
158 | #endif // BDL_HASHTABLE_H | ||