From 9eefa2e4842d5678f7829c7af45e38d30be617d8 Mon Sep 17 00:00:00 2001 From: William Carroll Date: Tue, 1 Mar 2022 13:55:13 -0800 Subject: feat(wpcarro/simple-select): Compile AST to SQL Currently supports the SQLite flavor of SQL. Preliminary testing seems to show encouraging results. Change-Id: Ib2ed6a695352f41185c8e8abdadfd76ce38bdbcc Reviewed-on: https://cl.tvl.fyi/c/depot/+/5344 Reviewed-by: wpcarro Autosubmit: wpcarro Tested-by: BuildkiteCI --- users/wpcarro/scratch/simple-select/main.py | 69 ++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 7 deletions(-) (limited to 'users/wpcarro/scratch') diff --git a/users/wpcarro/scratch/simple-select/main.py b/users/wpcarro/scratch/simple-select/main.py index 3bcf85e5efdf..eb674b79134e 100644 --- a/users/wpcarro/scratch/simple-select/main.py +++ b/users/wpcarro/scratch/simple-select/main.py @@ -117,14 +117,12 @@ def expression(p): def conjunction(p): lhs = selection(p) - while not p.exhausted() and p.test(lambda tokens, i: tokens[i] not in {LPAREN, RPAREN}): - conj = p.advance() if p.peek()[0] == "CONJUNCTION" else AND + # TODO(wpcarro): Support default AND conjuctions when they're undefined. + while not p.exhausted() and p.match({AND, OR}): + conj = p.peek(n=-1) rhs = selection(p) lhs = ("CONJUNCTION", conj[1], lhs, rhs) - if not p.exhausted(): - raise Exception("Encountered more tokens than we can parse: \"{}\"".format(p.tokens[p.i:])) - return lhs def selection(p): @@ -154,6 +152,54 @@ def grouping(p): p.expect(lambda x: x == RPAREN) return ("GROUPING", expr) +################################################################################ +# Compiler +################################################################################ + +def compile(source, table, columns): + ast = parse(source) + return "SELECT * FROM {} WHERE {};".format(table, do_compile(ast, columns)) + +def do_compile(ast, columns): + if ast[0] == "REGEX": + cols = "({})".format(" || ".join(columns)) + return "{} REGEXP '.*{}.*'".format(cols, ast[1]) + + if ast[0] == "STRING": + cols = "({})".format(" || ".join(columns)) + return "{} LIKE '%{}%'".format(cols, ast[1]) + + if ast[0] == "SELECTION": + return compile_selection(ast) + + if ast[0] == "CONJUNCTION": + _, conj, lhs, rhs = ast + lhs = do_compile(lhs, columns) + rhs = do_compile(rhs, columns) + return "{} {} {}".format(lhs, conj, rhs) + + if ast[0] == "GROUPING": + return "({})".format(do_compile(ast[1], columns)) + + raise Exception("Unexpected AST: \"{}\"".format(ast)) + +def compile_selection(ast): + _, negate, column, query = ast + match = compile_query(negate, query) + return "{} {}".format(column, match) + +def compile_query(negate, query): + query_type, query_string = query + if query_type == "REGEX": + if negate: + return "NOT REGEXP '.*{}.*'".format(query_string) + return "REGEXP '.*{}.*'".format(query_string) + + if query_type == "STRING": + if negate: + return "NOT LIKE '%{}%'".format(query_string) + return "LIKE '%{}%'".format(query_string) + ################################################################################ # Main ################################################################################ @@ -161,7 +207,16 @@ def grouping(p): def main(): while True: x = input("> ") - print(parse(x)) - + print("tokens:\t{}".format(tokenize(x))) + print("AST:\t{}".format(parse(x))) + # TODO(wpcarro): Read columns from CSV. + print("query:\t\"{}\"".format(compile(x, "Movies", [ + "year", + "rating", + "haveWatched", + "director", + "isCartoon", + "requiresSubtitles", + ]))) if __name__ == "__main__": main() -- cgit 1.4.1