diff options
author | William Carroll <wpcarro@gmail.com> | 2020-11-20T21·59+0000 |
---|---|---|
committer | William Carroll <wpcarro@gmail.com> | 2020-11-20T21·59+0000 |
commit | 847aad2a146583b21377a2c748b01f9a2df591a5 (patch) | |
tree | c910074589d44c85a64713021fc9c8e519763a4a /scratch | |
parent | f652ea0be6cb6ea4340971ed06852f071b8519a4 (diff) |
Implement the Levenstein "edit distance" algorithm
This is the mother of dynamic programming algorithms in my opinion. It computes the minimal "edit distance" between two input strings where an edit is considered one of: - inserting a character into `a` - deleting a character from `a` - substituting a character in `a` with a character from `b` It took me awhile to grok the algorithm, but I implemented this from my understanding of something that I read ~3 nights prior, so I must've understood what I read. Good news!
Diffstat (limited to 'scratch')
-rw-r--r-- | scratch/facebook/edit-distance.py | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/scratch/facebook/edit-distance.py b/scratch/facebook/edit-distance.py new file mode 100644 index 000000000000..a5b744f30f27 --- /dev/null +++ b/scratch/facebook/edit-distance.py @@ -0,0 +1,47 @@ +def print_grid(grid): + result = [] + for row in grid: + result.append(" ".join(str(c) for c in row)) + return print("\n".join(result)) + +def edit_distance(a, b): + """ + Compute the "edit distance" to transform string `a` into string `b`. + """ + grid = [] + for row in range(len(a) + 1): + r = [] + for col in range(len(b) + 1): + r.append(0) + grid.append(r) + + # left-to-right + # populate grid[0][i] + for col in range(len(grid[0])): + grid[0][col] = col + + # top-to-bottom + # populate grid[i][0] + for row in range(len(grid)): + grid[row][0] = row + + for row in range(1, len(grid)): + for col in range(1, len(grid[row])): + # last characters are the same + if a[0:row][-1] == b[0:col][-1]: + grid[row][col] = grid[row - 1][col - 1] + else: + # substitution + s = 1 + grid[row - 1][col - 1] + # deletion + d = 1 + grid[row - 1][col] + # insertion + i = 1 + grid[row][col - 1] + grid[row][col] = min(s, d, i) + print_grid(grid) + return grid[-1][-1] + +result = edit_distance("pizza", "pisa") +print(result) +assert result == 2 +print("Success!") |