subtree(users/wpcarro): docking briefcase at '24f5a642' r/3226

git-subtree-dir: users/wpcarro git-subtree-mainline: 464bbcb15c09813172c79820bcf526bb10cf4208 git-subtree-split: 24f5a642af3aa1627bbff977f0a101907a02c69f Change-Id: I6105b3762b79126b3488359c95978cadb3efa789
author: Vincent Ambo <mail@tazj.in> 2021-12-13T22·51+0300
committer: Vincent Ambo <mail@tazj.in> 2021-12-13T23·15+0300
commit: 019f8fd2113df4c5247c3969c60fd4f0e08f91f7 (patch)
tree: 76a857f61aa88f62a30e854651e8439db77fd0ea /users/wpcarro/scratch/facebook/evaluator.py
parent: 464bbcb15c09813172c79820bcf526bb10cf4208 (diff)
parent: 6123e976928ca3d8d93f0b2006b10b5f659eb74d (diff)
1 files changed, 234 insertions, 0 deletions
diff --git a/users/wpcarro/scratch/facebook/evaluator.py b/users/wpcarro/scratch/facebook/evaluator.py
new file mode 100644
index 000000000000..14deb66a8f65
--- /dev/null
+++ b/users/wpcarro/scratch/facebook/evaluator.py
@@ -0,0 +1,234 @@
+# After stumbling through my first technical screen, I'm going to drill
+# algorithms for implementing evaluators for a toy expression language:
+# e.g. 2 + 13 * 3 + 5 * 2
+#
+# As of now, I'm aware of a few algorithms for solving this:
+#   - DONE: Convert infix expression to Polish notation and evaluate the Polish
+#     notation.
+#   - DONE: Evaluate the tokens using two stacks and avoid converting it.
+#   - DONE: Create a tree of depth two to encode the operator precedence and
+#     evaluate that AST.
+#   - TODO: Convert the infix expression to a prefix expression
+#   - TODO: Write a recursive descent parser and evaluate the AST.
+
+operators = {
+    '*': 1,
+    '+': 0,
+}
+
+def tokenize(xs):
+    result = []
+    i = 0
+    while i < len(xs):
+        current = xs[i]
+        if current == ' ':
+            i += 1
+            continue
+        elif current in operators.keys():
+            result.append(current)
+            i += 1
+        else:
+            i += 1
+            while i < len(xs) and xs[i] in {str(n) for n in range(10)}:
+                current += xs[i]
+                i += 1
+            result.append(int(current))
+    return result
+
+# Convert infix to postfix; evaluate postfix
+# I believe this is known as the Shunting-Yards algorithm
+def postfix(tokens):
+    result = []
+    s = []
+    for token in tokens:
+        if type(token) == int:
+            result.append(token)
+        else:
+            while s and operators[token] < operators[s[-1]]:
+                result.append(s.pop())
+            s.append(token)
+    while s:
+        result.append(s.pop())
+    return result
+
+def do_evaluate_with_polish_notation(tokens):
+    s = []
+    for token in tokens:
+        if token == '*':
+            s.append(s.pop() * s.pop())
+        elif token == '+':
+            s.append(s.pop() + s.pop())
+        else:
+            s.append(token)
+    return s[-1]
+
+def evaluate_with_polish_notation(expr):
+    tokens = tokenize(expr)
+    print("Tokens:  {}".format(tokens))
+    pn = postfix(tokens)
+    print("Postfix: {}".format(pn))
+    result = do_evaluate_with_polish_notation(pn)
+    print("Result:  {}".format(result))
+    return result
+
+# Evaluate Tokens
+
+def apply_operator(op, a, b):
+    if op == '*':
+        return a * b
+    elif op == '+':
+        return a + b
+
+def do_evaluate_tokens(tokens):
+    vals = []
+    ops = []
+    for token in tokens:
+        if type(token) == int:
+            vals.append(token)
+        elif token == '*':
+            ops.append(token)
+        elif token == '+':
+            while ops and operators[token] < operators[ops[-1]]:
+                vals.append(apply_operator(ops.pop(), vals.pop(), vals.pop()))
+            ops.append(token)
+        else:
+            raise Exception("Unexpected token: {}".format(token))
+    while ops:
+        vals.append(apply_operator(ops.pop(), vals.pop(), vals.pop()))
+    return vals[-1]
+
+def evaluate_tokens(expr):
+    tokens = tokenize(expr)
+    print("Tokens:  {}".format(tokens))
+    result = do_evaluate_tokens(tokens)
+    print("Result:  {}".format(result))
+    return result
+
+# Ad Hoc Tree
+
+def parse(tokens):
+    result = []
+    series = []
+    for token in tokens:
+        if type(token) == int:
+            series.append(token)
+        elif token == '*':
+            continue
+        elif token == '+':
+            result.append(series)
+            series = []
+        else:
+            raise Exception("Unexpected token: {}".format(token))
+    result.append(series)
+    return result
+
+def product(xs):
+    result = 1
+    for x in xs:
+        result *= x
+    return result
+
+def do_evaluate_ad_hoc_tree(ast):
+    return sum([product(xs) for xs in ast])
+
+def evaluate_ad_hoc_tree(expr):
+    tokens = tokenize(expr)
+    print("Tokens:  {}".format(tokens))
+    ast = parse(tokens)
+    print("AST:     {}".format(ast))
+    result = do_evaluate_ad_hoc_tree(ast)
+    print("Result:  {}".format(result))
+    return result
+
+# Recursive Descent Parser
+
+# expression     -> addition ;
+# addition       -> multiplication ( "+" multiplication )* ;
+# multiplication -> terminal ( "*" terminal )* ;
+# terminal       -> NUMBER ;
+
+class Parser(object):
+    def __init__(self, tokens):
+        self.tokens = tokens
+        self.i = 0
+
+    # mutations
+    def advance(self):
+        self.i += 1
+
+    def consume(self):
+        result = self.curr()
+        self.advance()
+        return result
+
+    # predicates
+    def match(self, x):
+        if self.curr() == x:
+            self.advance()
+            return True
+        return False
+
+    def tokens_available(self):
+        return self.i < len(self.tokens)
+
+    # getters
+    def prev(self):
+        return self.tokens[self.i - 1]
+
+    def curr(self):
+        return self.tokens[self.i] if self.tokens_available() else None
+
+    def next(self):
+        return self.tokens[self.i + 1]
+
+def parse_expression(tokens):
+    parser = Parser(tokens)
+    return parse_addition(parser)
+
+def parse_addition(parser):
+    result = parse_multiplication(parser)
+    while parser.match("+"):
+        op = parser.prev()
+        rhs = parse_multiplication(parser)
+        result = ["+", result, rhs]
+    return result
+
+def parse_multiplication(parser):
+    result = parse_terminal(parser)
+    while parser.match("*"):
+        op = parser.prev()
+        rhs = parse_terminal(parser)
+        result = ["*", result, rhs]
+    return result
+
+def parse_terminal(parser):
+    # If we reach here, the current token *must* be a number.
+    return parser.consume()
+
+def evaluate_ast(ast):
+    if type(ast) == int:
+        return ast
+    else:
+        op, lhs, rhs = ast[0], ast[1], ast[2]
+        return apply_operator(op, evaluate_ast(lhs), evaluate_ast(rhs))
+
+def evaluate_recursive_descent(expr):
+    tokens = tokenize(expr)
+    print("Tokens:  {}".format(tokens))
+    ast = parse_expression(tokens)
+    print("AST:     {}".format(ast))
+    result = evaluate_ast(ast)
+    return result
+
+methods = {
+    'Polish Notation': evaluate_with_polish_notation,
+    'Evaluate Tokens': evaluate_tokens,
+    'Ad Hoc Tree': evaluate_ad_hoc_tree,
+    'Recursive Descent': evaluate_recursive_descent,
+}
+
+for name, fn in methods.items():
+    expr = "13 + 2 * 4 + 7 + 3 * 8"
+    print("Evaluating \"{}\" using the \"{}\" method...".format(expr, name))
+    assert fn(expr) == eval(expr)
+    print("Success!")
author	Vincent Ambo <mail@tazj.in>	2021-12-13T22·51+0300
committer	Vincent Ambo <mail@tazj.in>	2021-12-13T23·15+0300
commit	019f8fd2113df4c5247c3969c60fd4f0e08f91f7 (patch)
tree	76a857f61aa88f62a30e854651e8439db77fd0ea /users/wpcarro/scratch/facebook/evaluator.py
parent	464bbcb15c09813172c79820bcf526bb10cf4208 (diff)
parent	6123e976928ca3d8d93f0b2006b10b5f659eb74d (diff)