about summary refs log tree commit diff
path: root/scratch/facebook/polynomial-rolling-hash.py
diff options
context:
space:
mode:
authorWilliam Carroll <wpcarro@gmail.com>2020-11-16T00·35+0000
committerWilliam Carroll <wpcarro@gmail.com>2020-11-16T00·35+0000
commit30f4d6f4a40bfae6bc15332a4e614d40d71fedb4 (patch)
tree46c871c0b8ff038e2fa866a8efed8dfb64fc7f52 /scratch/facebook/polynomial-rolling-hash.py
parent363519273a0e2e7f3e6d63e8d2017b25f480eac9 (diff)
Implement a simple hash function and hash table
I was always curious how hashing functions were implemented, so I read about the
"polynomial rolling hash function", and I decided implementing it would be a
good exercise. After writing that, writing a hash table was simple.
Diffstat (limited to 'scratch/facebook/polynomial-rolling-hash.py')
-rw-r--r--scratch/facebook/polynomial-rolling-hash.py59
1 files changed, 59 insertions, 0 deletions
diff --git a/scratch/facebook/polynomial-rolling-hash.py b/scratch/facebook/polynomial-rolling-hash.py
new file mode 100644
index 000000000000..ba288d631ba7
--- /dev/null
+++ b/scratch/facebook/polynomial-rolling-hash.py
@@ -0,0 +1,59 @@
+def compute_hash(x):
+    """
+    Compute a unique fingerprint for the string input, `x`, as an integer using
+    the following equation:
+
+    x[0] * P^0 + x[1] * P^1 + ... x[n-1] * P^(n-1) % M
+
+    P and M are constants.
+    """
+    p = 31 # small prime number
+    m = int(10e9) + 9 # large prime number
+    power = 0
+    result = 0
+    for c in x:
+        result += ord(c) * p**power
+        power += 1
+    return result % m
+
+class HashTable(object):
+    def __init__(self, size):
+        buckets = []
+        for _ in range(size):
+            buckets.append([])
+        self.xs = buckets
+        self.compute_hash = lambda k: compute_hash(k) % size
+
+    def __repr__(self):
+        result = []
+        for bucket in self.xs:
+            for entry in bucket:
+                result.append(entry)
+        return "HashTable({})".format(",".join(str(x) for x in result))
+
+    def get(self, key):
+        """
+        Attempt to retrieve value stored under `key`.
+        """
+        h = self.compute_hash(key)
+        for k, v in self.xs[h]:
+            if k == key:
+                return v
+        return None
+
+    def put(self, key, val):
+        """
+        Set `key` to `val`; update value at `key` if it already exists.
+        """
+        h = self.compute_hash(key)
+        for i in range(len(self.xs[h])):
+            # Update entry if the key exists...
+            if self.xs[h][i][0] == key:
+                self.xs[h][i] = (key, val)
+                return None
+        # ...create a new entry otherwise
+        self.xs[h].append((key, val))
+
+    def delete(self, key):
+        h = self.compute_hash(key)
+        self.xs[h] = list(filter(lambda entry: entry[0] != key, self.xs[h]))