From af3f2ebc42bf9dc64fc581b9f8f79e98895cb417 Mon Sep 17 00:00:00 2001
From: Bad Diode <bd@badd10de.dev>
Date: Sat, 30 Oct 2021 10:13:11 +0200
Subject: Add hashtable for Environment tracking

---
 src/hashtable.h | 180 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/parser.c    |  53 +++++++++++++++++
 src/parser.h    |   7 +++
 3 files changed, 240 insertions(+)
 create mode 100644 src/hashtable.h

(limited to 'src')

diff --git a/src/hashtable.h b/src/hashtable.h
new file mode 100644
index 0000000..faa8591
--- /dev/null
+++ b/src/hashtable.h
@@ -0,0 +1,180 @@
+#ifndef BDL_HASHTABLE_H
+#define BDL_HASHTABLE_H
+
+#include "darray.h"
+
+// Minimum table capacity.
+#define HT_MIN_CAP   4
+#define HT_MIN_SHIFT 2
+
+// Adjust the load factor threshold at which the table will grow on insertion.
+#define HT_LOAD_THRESHOLD 0.8
+
+typedef struct HashTablePair {
+    void *key;
+    void *value;
+} HashTablePair;
+
+struct HashTable;
+typedef uint64_t (HashFunction)(const struct HashTable *table, void *bytes);
+typedef bool (EqFunc)(void *a, void *b);
+
+typedef struct HashTable {
+    // All available key-value pairs as a dynamic array.
+    HashTablePair *pairs;
+
+    // Hash function.
+    HashFunction *hash_func;
+
+    // Equality function.
+    EqFunc *eq_func;
+
+    // This table expects the number of buckets to grow in powers of two. To
+    // speedup the default hashing, we memoize the number of bits equivalent to
+    // that power of 2:
+    //
+    //     cap := 1024 = 2 ^ 10, shift_amount := 10
+    //
+    uint8_t shift_amount;
+} HashTable;
+
+// Hash a byte stream using a circular shift + XOR hash function.
+static inline uint64_t
+_xor_shift_hash(const char *key, size_t n) {
+    uint64_t hash = 0x65d9d65f6a19574f;
+    char *last = (char *)key + n;
+    while (key != last) {
+        hash ^= (uint64_t)*key++;
+        hash = (hash << 8) | (hash >> (64 - 8));
+    }
+    return hash;
+}
+
+// Use Fibonacci hashing to map a hash to a value in range of the table.
+static inline uint64_t
+_fibonacci_hash(uint64_t hash, size_t shift_amount) {
+    return (hash * UINT64_C(11400714819323198485)) >> (64 - shift_amount);
+}
+
+static inline float
+ht_load_factor(const HashTable *table) {
+    return (float)array_size(table->pairs) / (float)array_cap(table->pairs);
+}
+
+HashTable *
+ht_init(HashFunction *hash_func, EqFunc *eq_func) {
+    HashTable *table = malloc(sizeof(HashTable));
+    *table = (HashTable){0};
+    array_init(table->pairs, HT_MIN_CAP);
+    for (size_t i = 0; i < array_cap(table->pairs); i++) {
+        table->pairs[i] = (HashTablePair){NULL, NULL};
+    }
+    table->shift_amount = HT_MIN_SHIFT;
+    table->hash_func = hash_func;
+    table->eq_func = eq_func;
+    return table;
+}
+
+void
+_ht_insert(HashTable *table, void *key, void *value) {
+    size_t position = table->hash_func(table, key);
+    size_t probe_position = position;
+
+    // Verify the key in that position is free. If not, use linear probing to
+    // find the next free slot.
+    HashTablePair *pairs = table->pairs;
+    bool update = false;
+    while (true) {
+        if (pairs[probe_position].key == NULL) {
+            array_head(pairs)->size++;
+            break;
+        }
+        if (table->eq_func(pairs[probe_position].key, key)) {
+            update = true;
+            break;
+        }
+        if (probe_position == array_cap(pairs) - 1) {
+            probe_position = 0;
+        } else {
+            probe_position++;
+        }
+    }
+
+    if (!update) {
+        pairs[probe_position].key = key;
+        pairs[probe_position].value = value;
+    } else {
+        pairs[probe_position].value = value;
+    }
+}
+
+void
+_ht_maybe_grow(HashTable *table) {
+    if (ht_load_factor(table) < HT_LOAD_THRESHOLD) {
+        return;
+    }
+
+    // Create a new array with 2x capacity.
+    HashTablePair *old_pairs = table->pairs;
+    table->pairs = NULL;
+    array_init(table->pairs, array_cap(old_pairs) * 2);
+    for (size_t i = 0; i < array_cap(table->pairs); i++) {
+        table->pairs[i] = (HashTablePair){NULL, NULL};
+    }
+    table->shift_amount++;
+
+    // Hash everything in the table for the new array capacity.
+    for (size_t i = 0; i < array_cap(old_pairs); i++) {
+        if (old_pairs[i].key != NULL) {
+            _ht_insert(table, old_pairs[i].key, old_pairs[i].value);
+        }
+    }
+
+    // Free old arrays.
+    array_free(old_pairs);
+}
+
+void
+ht_insert(HashTable *table, void *key, void *value) {
+    _ht_maybe_grow(table);
+    _ht_insert(table, key, value);
+    return;
+}
+
+void *
+ht_lookup(const HashTable *table, void *key) {
+    size_t position = table->hash_func(table, key);
+    size_t probe_position = position;
+
+    // Verify the key in that position is the same. If not perform linear
+    // probing to find it.
+    HashTablePair *pairs = table->pairs;
+    while (true) {
+        if (pairs[probe_position].key == NULL) {
+            return NULL;
+        }
+        if (table->eq_func(pairs[probe_position].key, key)) {
+            break;
+        }
+        if (probe_position == array_cap(pairs) - 1) {
+            probe_position = 0;
+        } else {
+            probe_position++;
+        }
+        if (probe_position == position) {
+            return NULL;
+        }
+    }
+    return pairs[probe_position].value;
+}
+
+void
+ht_free(HashTable *table) {
+    if (table == NULL) {
+        return;
+    }
+    array_free(table->pairs);
+    free(table);
+}
+
+#endif // BDL_HASHTABLE_H
diff --git a/src/parser.c b/src/parser.c
index 2215fad..e16fc4a 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -4,6 +4,15 @@
 static Object **objects = NULL;
 static Root *roots = NULL;
 
+
+uint64_t
+symbol_hash(const HashTable *table, void *key) {
+    Object *obj = key;
+    uint64_t hash = _xor_shift_hash(obj->text, array_size(obj->text));
+    hash = _fibonacci_hash(hash, table->shift_amount);
+    return hash;
+}
+
 Token
 peek_token(const Parser *parser) {
     return parser->tokens[parser->current];
@@ -391,6 +400,16 @@ parse(Token *tokens, Errors *errors) {
         array_push(roots, root);
     }
 
+    // TEST INSERTION/LOOKUP.
+    HashTable *table = ht_init(symbol_hash, object_equal);
+    ht_insert(table, &roots[0][0], &roots[1][0]);
+    ht_insert(table, &roots[2][0], &roots[3][0]);
+    Object *val = ht_lookup(table, &roots[2][0]);
+    object_display(val);
+    val = ht_lookup(table, &roots[0][0]);
+    object_display(val);
+    ht_free(table);
+
     // Perform semantic analysis.
     // TODO: Check that symbols are defined before usage.
     // TODO: Remove unnecessary statements.
@@ -522,3 +541,37 @@ object_display(Object *obj) {
     }
     return;
 }
+
+bool
+object_equal(Object *a, Object *b) {
+    if (a == NULL || b == NULL || a->type != b->type) {
+        return false;
+    }
+    switch (a->type) {
+        case OBJ_TYPE_TRUE:
+        case OBJ_TYPE_FALSE: {
+            return true;
+        } break;
+        case OBJ_TYPE_FIXNUM: {
+            return a->fixnum == b->fixnum;
+        } break;
+        case OBJ_TYPE_SYMBOL:
+        case OBJ_TYPE_STRING: {
+            if (array_size(a->text) != array_size(b->text)) {
+                return false;
+            }
+            for (size_t i = 0; i < array_size(a->text); i++) {
+                if (a->text[i] != b->text[i]) {
+                    return false;
+                }
+            }
+        } break;
+        case OBJ_TYPE_LAMBDA: {
+            // return a->closure == b.closure;
+        } break;
+        default: {
+            return false;
+        } break;
+    }
+    return true;
+}
diff --git a/src/parser.h b/src/parser.h
index 14d9df5..d1eddc7 100755
--- a/src/parser.h
+++ b/src/parser.h
@@ -2,6 +2,7 @@
 #define BDL_PARSER_H
 
 #include "lexer.h"
+#include "hashtable.h"
 
 typedef enum ObjectType {
     OBJ_TYPE_NIL,
@@ -58,6 +59,11 @@ typedef struct Object {
     size_t col;
 } Object;
 
+typedef struct Environment {
+    HashTable *table;
+    struct Environment *parent;
+};
+
 typedef struct Parser {
     Token *tokens;
     size_t current;
@@ -86,6 +92,7 @@ Root * parse(Token *tokens, Errors *errors);
 
 // Object operations.
 void object_display(Object *obj);
+bool object_equal(Object *a, Object *b);
 
 // Manage resources.
 Object * object_alloc(Token tok, ObjectType type);
-- 
cgit v1.2.1