diff options
Diffstat (limited to 'scratch/facebook/evaluator.py')
-rw-r--r-- | scratch/facebook/evaluator.py | 234 |
1 files changed, 234 insertions, 0 deletions
diff --git a/scratch/facebook/evaluator.py b/scratch/facebook/evaluator.py new file mode 100644 index 000000000000..14deb66a8f65 --- /dev/null +++ b/scratch/facebook/evaluator.py @@ -0,0 +1,234 @@ +# After stumbling through my first technical screen, I'm going to drill +# algorithms for implementing evaluators for a toy expression language: +# e.g. 2 + 13 * 3 + 5 * 2 +# +# As of now, I'm aware of a few algorithms for solving this: +# - DONE: Convert infix expression to Polish notation and evaluate the Polish +# notation. +# - DONE: Evaluate the tokens using two stacks and avoid converting it. +# - DONE: Create a tree of depth two to encode the operator precedence and +# evaluate that AST. +# - TODO: Convert the infix expression to a prefix expression +# - TODO: Write a recursive descent parser and evaluate the AST. + +operators = { + '*': 1, + '+': 0, +} + +def tokenize(xs): + result = [] + i = 0 + while i < len(xs): + current = xs[i] + if current == ' ': + i += 1 + continue + elif current in operators.keys(): + result.append(current) + i += 1 + else: + i += 1 + while i < len(xs) and xs[i] in {str(n) for n in range(10)}: + current += xs[i] + i += 1 + result.append(int(current)) + return result + +# Convert infix to postfix; evaluate postfix +# I believe this is known as the Shunting-Yards algorithm +def postfix(tokens): + result = [] + s = [] + for token in tokens: + if type(token) == int: + result.append(token) + else: + while s and operators[token] < operators[s[-1]]: + result.append(s.pop()) + s.append(token) + while s: + result.append(s.pop()) + return result + +def do_evaluate_with_polish_notation(tokens): + s = [] + for token in tokens: + if token == '*': + s.append(s.pop() * s.pop()) + elif token == '+': + s.append(s.pop() + s.pop()) + else: + s.append(token) + return s[-1] + +def evaluate_with_polish_notation(expr): + tokens = tokenize(expr) + print("Tokens: {}".format(tokens)) + pn = postfix(tokens) + print("Postfix: {}".format(pn)) + result = do_evaluate_with_polish_notation(pn) + print("Result: {}".format(result)) + return result + +# Evaluate Tokens + +def apply_operator(op, a, b): + if op == '*': + return a * b + elif op == '+': + return a + b + +def do_evaluate_tokens(tokens): + vals = [] + ops = [] + for token in tokens: + if type(token) == int: + vals.append(token) + elif token == '*': + ops.append(token) + elif token == '+': + while ops and operators[token] < operators[ops[-1]]: + vals.append(apply_operator(ops.pop(), vals.pop(), vals.pop())) + ops.append(token) + else: + raise Exception("Unexpected token: {}".format(token)) + while ops: + vals.append(apply_operator(ops.pop(), vals.pop(), vals.pop())) + return vals[-1] + +def evaluate_tokens(expr): + tokens = tokenize(expr) + print("Tokens: {}".format(tokens)) + result = do_evaluate_tokens(tokens) + print("Result: {}".format(result)) + return result + +# Ad Hoc Tree + +def parse(tokens): + result = [] + series = [] + for token in tokens: + if type(token) == int: + series.append(token) + elif token == '*': + continue + elif token == '+': + result.append(series) + series = [] + else: + raise Exception("Unexpected token: {}".format(token)) + result.append(series) + return result + +def product(xs): + result = 1 + for x in xs: + result *= x + return result + +def do_evaluate_ad_hoc_tree(ast): + return sum([product(xs) for xs in ast]) + +def evaluate_ad_hoc_tree(expr): + tokens = tokenize(expr) + print("Tokens: {}".format(tokens)) + ast = parse(tokens) + print("AST: {}".format(ast)) + result = do_evaluate_ad_hoc_tree(ast) + print("Result: {}".format(result)) + return result + +# Recursive Descent Parser + +# expression -> addition ; +# addition -> multiplication ( "+" multiplication )* ; +# multiplication -> terminal ( "*" terminal )* ; +# terminal -> NUMBER ; + +class Parser(object): + def __init__(self, tokens): + self.tokens = tokens + self.i = 0 + + # mutations + def advance(self): + self.i += 1 + + def consume(self): + result = self.curr() + self.advance() + return result + + # predicates + def match(self, x): + if self.curr() == x: + self.advance() + return True + return False + + def tokens_available(self): + return self.i < len(self.tokens) + + # getters + def prev(self): + return self.tokens[self.i - 1] + + def curr(self): + return self.tokens[self.i] if self.tokens_available() else None + + def next(self): + return self.tokens[self.i + 1] + +def parse_expression(tokens): + parser = Parser(tokens) + return parse_addition(parser) + +def parse_addition(parser): + result = parse_multiplication(parser) + while parser.match("+"): + op = parser.prev() + rhs = parse_multiplication(parser) + result = ["+", result, rhs] + return result + +def parse_multiplication(parser): + result = parse_terminal(parser) + while parser.match("*"): + op = parser.prev() + rhs = parse_terminal(parser) + result = ["*", result, rhs] + return result + +def parse_terminal(parser): + # If we reach here, the current token *must* be a number. + return parser.consume() + +def evaluate_ast(ast): + if type(ast) == int: + return ast + else: + op, lhs, rhs = ast[0], ast[1], ast[2] + return apply_operator(op, evaluate_ast(lhs), evaluate_ast(rhs)) + +def evaluate_recursive_descent(expr): + tokens = tokenize(expr) + print("Tokens: {}".format(tokens)) + ast = parse_expression(tokens) + print("AST: {}".format(ast)) + result = evaluate_ast(ast) + return result + +methods = { + 'Polish Notation': evaluate_with_polish_notation, + 'Evaluate Tokens': evaluate_tokens, + 'Ad Hoc Tree': evaluate_ad_hoc_tree, + 'Recursive Descent': evaluate_recursive_descent, +} + +for name, fn in methods.items(): + expr = "13 + 2 * 4 + 7 + 3 * 8" + print("Evaluating \"{}\" using the \"{}\" method...".format(expr, name)) + assert fn(expr) == eval(expr) + print("Success!") |