diff options
author | William Carroll <wpcarro@gmail.com> | 2020-11-19T00·35+0000 |
---|---|---|
committer | William Carroll <wpcarro@gmail.com> | 2020-11-19T00·35+0000 |
commit | 1088e4143df5292eee487df17c350563a7d31fd0 (patch) | |
tree | c4b612040a690d6c88129c5a8dbd51efb5eed675 /scratch/facebook/hard/suffix-tree.py | |
parent | c0268ed31a8049ce75a7e61737b8e520e7d5403e (diff) |
Implement a suffix tree
While it took me awhile to implement, this exercise was definitely worth doing. I think there should be a more elegant way to construct the tree using maybe a stack, but I couldn't find it. All of this was part of a larger effort to search a string for a variety of patterns. The solution is to compile the string into a suffix tree and then search the suffix tree for each of the patterns. I'm glad I didn't gloss over this exercise.
Diffstat (limited to 'scratch/facebook/hard/suffix-tree.py')
-rw-r--r-- | scratch/facebook/hard/suffix-tree.py | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/scratch/facebook/hard/suffix-tree.py b/scratch/facebook/hard/suffix-tree.py new file mode 100644 index 000000000000..600d06591bdb --- /dev/null +++ b/scratch/facebook/hard/suffix-tree.py @@ -0,0 +1,64 @@ +import random + +def exists(pattern, tree): + """ + Return true if `pattern` exists in `tree`. + """ + if len(pattern) == 0: + return True + if len(pattern) == 1: + for branch in tree: + if branch[0] == pattern[0]: + return True + return False + for branch in tree: + if branch[0] == pattern[0]: + return exists(pattern[1:], branch[1]) + return False + +# Branch :: (Char, [Branch]) +# SuffixTree :: [Branch] + +def suffix_tree(xs): + """ + Create a suffix tree from the input string, `xs`. + """ + root = [] + for i in range(len(xs)): + curr = xs[i:] + parent = root + for c1 in curr: + grafted = False + for c2, children in parent: + if c1 == c2: + grafted = True + parent = children + if grafted: + continue + else: + children = [] + child = (c1, children) + parent.append(child) + parent = children + return root + + +################################################################################ +# Tests +################################################################################ + +x = random.choice(["burrito", "pizza", "guacamole"]) +tree = suffix_tree(x) +for branch in tree: + print(branch) + +for _ in range(3): + n = len(x) + i, j = random.randint(0, n), random.randint(0, n) + pattern = x[min(i, j):max(i, j)] + print("Checking \"{}\" for \"{}\" ...".format(x, pattern)) + print("Result: {}".format(exists(pattern, tree))) + pattern = random.choice(["foo", "bar", "baz"]) + print("Checking \"{}\" for \"{}\" ...".format(x, pattern)) + print("Result: {}".format(exists(pattern, tree))) + print() |