about summary refs log tree commit diff
diff options
context:
space:
mode:
authorWilliam Carroll <wpcarro@gmail.com>2020-11-20T21·59+0000
committerWilliam Carroll <wpcarro@gmail.com>2020-11-20T21·59+0000
commit847aad2a146583b21377a2c748b01f9a2df591a5 (patch)
treec910074589d44c85a64713021fc9c8e519763a4a
parentf652ea0be6cb6ea4340971ed06852f071b8519a4 (diff)
Implement the Levenstein "edit distance" algorithm
This is the mother of dynamic programming algorithms in my opinion. It computes
the minimal "edit distance" between two input strings where an edit is
considered one of:
  - inserting a character into `a`
  - deleting a character from `a`
  - substituting a character in `a` with a character from `b`

It took me awhile to grok the algorithm, but I implemented this from my
understanding of something that I read ~3 nights prior, so I must've understood
what I read. Good news!
-rw-r--r--scratch/facebook/edit-distance.py47
1 files changed, 47 insertions, 0 deletions
diff --git a/scratch/facebook/edit-distance.py b/scratch/facebook/edit-distance.py
new file mode 100644
index 000000000000..a5b744f30f27
--- /dev/null
+++ b/scratch/facebook/edit-distance.py
@@ -0,0 +1,47 @@
+def print_grid(grid):
+    result = []
+    for row in grid:
+        result.append(" ".join(str(c) for c in row))
+    return print("\n".join(result))
+
+def edit_distance(a, b):
+    """
+    Compute the "edit distance" to transform string `a` into string `b`.
+    """
+    grid = []
+    for row in range(len(a) + 1):
+        r = []
+        for col in range(len(b) + 1):
+            r.append(0)
+        grid.append(r)
+
+    # left-to-right
+    # populate grid[0][i]
+    for col in range(len(grid[0])):
+        grid[0][col] = col
+
+    # top-to-bottom
+    # populate grid[i][0]
+    for row in range(len(grid)):
+        grid[row][0] = row
+
+    for row in range(1, len(grid)):
+        for col in range(1, len(grid[row])):
+            # last characters are the same
+            if a[0:row][-1] == b[0:col][-1]:
+                grid[row][col] = grid[row - 1][col - 1]
+            else:
+                # substitution
+                s = 1 + grid[row - 1][col - 1]
+                # deletion
+                d = 1 + grid[row - 1][col]
+                # insertion
+                i = 1 + grid[row][col - 1]
+                grid[row][col] = min(s, d, i)
+    print_grid(grid)
+    return grid[-1][-1]
+
+result = edit_distance("pizza", "pisa")
+print(result)
+assert result == 2
+print("Success!")