about summary refs log tree commit diff
path: root/users/wpcarro/scratch/facebook/polynomial-rolling-hash.py
diff options
context:
space:
mode:
Diffstat (limited to 'users/wpcarro/scratch/facebook/polynomial-rolling-hash.py')
-rw-r--r--users/wpcarro/scratch/facebook/polynomial-rolling-hash.py72
1 files changed, 72 insertions, 0 deletions
diff --git a/users/wpcarro/scratch/facebook/polynomial-rolling-hash.py b/users/wpcarro/scratch/facebook/polynomial-rolling-hash.py
new file mode 100644
index 000000000000..0c7b7cb5a0c4
--- /dev/null
+++ b/users/wpcarro/scratch/facebook/polynomial-rolling-hash.py
@@ -0,0 +1,72 @@
+def compute_hash(x):
+    """
+    Compute a unique fingerprint for the string input, `x`, as an integer using
+    the following equation:
+
+    x[0] * P^0 + x[1] * P^1 + ... x[n-1] * P^(n-1) % M
+
+    P and M are constants where P represents the next available prime number
+    that's GTE the number of unique characters you'll be hashing. In the case of
+    all lowercase characters, of which there are 26, the next available prime
+    number is 31.
+    """
+    p = 31
+    m = int(10e9) + 9 # large prime number
+    power = 0
+    result = 0
+    for c in x:
+        result += ord(c) * p**power
+        power += 1
+    return result % m
+
+class HashTable(object):
+    def __init__(self, size):
+        """
+        Create a hash table with `size` buckets.
+        """
+        buckets = []
+        for _ in range(size):
+            buckets.append([])
+        self.xs = buckets
+        self.compute_hash = lambda k: compute_hash(k) % size
+
+    def __repr__(self):
+        result = []
+        for bucket in self.xs:
+            for entry in bucket:
+                result.append(entry)
+        return "HashTable({})".format(",".join(str(x) for x in result))
+
+    def get(self, key):
+        """
+        Attempt to retrieve value stored under `key`.
+        """
+        h = self.compute_hash(key)
+        for k, v in self.xs[h]:
+            if k == key:
+                return v
+        return None
+
+    def put(self, key, val):
+        """
+        Set `key` to `val`; update value at `key` if it already exists.
+        """
+        h = self.compute_hash(key)
+        for i in range(len(self.xs[h])):
+            # Update entry if the key exists...
+            if self.xs[h][i][0] == key:
+                self.xs[h][i] = (key, val)
+                return None
+        # ...create a new entry otherwise
+        self.xs[h].append((key, val))
+
+    def delete(self, key):
+        """
+        Remove entry `key` from the hash table.
+        """
+        h = self.compute_hash(key)
+        for i in range(len(self.xs[h])):
+            k, v = self.xs[h][i]
+            if k == key:
+                self.xs[h].remove((k, v))
+                return