diff options
Diffstat (limited to 'users/wpcarro/scratch/simple-select/main.py')
-rw-r--r-- | users/wpcarro/scratch/simple-select/main.py | 83 |
1 files changed, 77 insertions, 6 deletions
diff --git a/users/wpcarro/scratch/simple-select/main.py b/users/wpcarro/scratch/simple-select/main.py index 0aea8dcffc5e..3bcf85e5efdf 100644 --- a/users/wpcarro/scratch/simple-select/main.py +++ b/users/wpcarro/scratch/simple-select/main.py @@ -1,5 +1,7 @@ import string from scanner import Scanner +from parser import Parser + ################################################################################ # Predicates ################################################################################ @@ -20,6 +22,13 @@ def is_whitespace(c): # Tokenizer ################################################################################ +AND = ("CONJUNCTION", "AND") +OR = ("CONJUNCTION", "OR") +NOT = ("PUNCTUATION", "NOT") +COLON = ("PUNCTUATION", "COLON") +LPAREN = ("PUNCTUATION", "LPAREN") +RPAREN = ("PUNCTUATION", "RPAREN") + def tokenize(x): s = Scanner(x) tokens = scan_tokens(s) @@ -36,8 +45,10 @@ def scan_tokens(s): def scan_token(s): punctuation = { - "-": "NOT", - ":": "COLON", + "-": NOT, + ":": COLON, + "(": LPAREN, + ")": RPAREN, } c = s.peek() if c in punctuation: @@ -71,26 +82,86 @@ def tokenize_regex(s): return ("REGEX", current) def tokenize_identifier(s): - keywords = { + conjunctions = { "AND", "OR", } current = s.advance() while is_alphanumeric(s.peek()): current += s.advance() - if current.upper() in keywords: - return ("KEYWORD", current.upper()) + if current.upper() in conjunctions: + return ("CONJUNCTION", current.upper()) else: return ("IDENTIFIER", current) ################################################################################ +# Parser +################################################################################ + +# EBNF +# Note: we order expression types by ascending levels of precedence. +# +# expression -> conjunction ; +# conjunction -> selection ( ( "AND" | "OR" )? selection )* ; +# selection -> "-"? IDENTIFIER ":" ( REGEX | STRING ) | grouping ; +# grouping -> REGEX | STRING | "(" expression ")" ; + +def parse(x): + tokens = tokenize(x) + p = Parser(tokens) + return expression(p) + +def expression(p): + return conjunction(p) + +def conjunction(p): + lhs = selection(p) + + while not p.exhausted() and p.test(lambda tokens, i: tokens[i] not in {LPAREN, RPAREN}): + conj = p.advance() if p.peek()[0] == "CONJUNCTION" else AND + rhs = selection(p) + lhs = ("CONJUNCTION", conj[1], lhs, rhs) + + if not p.exhausted(): + raise Exception("Encountered more tokens than we can parse: \"{}\"".format(p.tokens[p.i:])) + + return lhs + +def selection(p): + negate = False + if p.peek() == NOT: + negate = True + p.advance() + + if p.peek()[0] != "IDENTIFIER": + return grouping(p) + + ident = p.expect(lambda x: x[0] == "IDENTIFIER") + colon = p.expect(lambda x: x[1] == "COLON") + value = p.expect(lambda x: x[0] in {"REGEX", "STRING"}) + return ("SELECTION", negate, ident[1], value) + +def grouping(p): + if p.peek()[0] == "REGEX": + return p.advance() + + if p.peek()[0] == "STRING": + return p.advance() + + if p.peek() == LPAREN: + p.advance() + expr = expression(p) + p.expect(lambda x: x == RPAREN) + return ("GROUPING", expr) + +################################################################################ # Main ################################################################################ def main(): while True: x = input("> ") - print(tokenize(x)) + print(parse(x)) if __name__ == "__main__": main() |