diff options
author | William Carroll <wpcarro@gmail.com> | 2022-03-01T21·55-0800 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2022-03-01T21·58+0000 |
commit | 9eefa2e4842d5678f7829c7af45e38d30be617d8 (patch) | |
tree | 266bb456c41d496621d93d9ec7effa50d5940831 /users/wpcarro/scratch | |
parent | 69956d9a0bb32edb4cf1775642a06fe66fa20f8b (diff) |
feat(wpcarro/simple-select): Compile AST to SQL r/3886
Currently supports the SQLite flavor of SQL. Preliminary testing seems to show encouraging results. Change-Id: Ib2ed6a695352f41185c8e8abdadfd76ce38bdbcc Reviewed-on: https://cl.tvl.fyi/c/depot/+/5344 Reviewed-by: wpcarro <wpcarro@gmail.com> Autosubmit: wpcarro <wpcarro@gmail.com> Tested-by: BuildkiteCI
Diffstat (limited to 'users/wpcarro/scratch')
-rw-r--r-- | users/wpcarro/scratch/simple-select/main.py | 69 |
1 files changed, 62 insertions, 7 deletions
diff --git a/users/wpcarro/scratch/simple-select/main.py b/users/wpcarro/scratch/simple-select/main.py index 3bcf85e5efdf..eb674b79134e 100644 --- a/users/wpcarro/scratch/simple-select/main.py +++ b/users/wpcarro/scratch/simple-select/main.py @@ -117,14 +117,12 @@ def expression(p): def conjunction(p): lhs = selection(p) - while not p.exhausted() and p.test(lambda tokens, i: tokens[i] not in {LPAREN, RPAREN}): - conj = p.advance() if p.peek()[0] == "CONJUNCTION" else AND + # TODO(wpcarro): Support default AND conjuctions when they're undefined. + while not p.exhausted() and p.match({AND, OR}): + conj = p.peek(n=-1) rhs = selection(p) lhs = ("CONJUNCTION", conj[1], lhs, rhs) - if not p.exhausted(): - raise Exception("Encountered more tokens than we can parse: \"{}\"".format(p.tokens[p.i:])) - return lhs def selection(p): @@ -155,13 +153,70 @@ def grouping(p): return ("GROUPING", expr) ################################################################################ +# Compiler +################################################################################ + +def compile(source, table, columns): + ast = parse(source) + return "SELECT * FROM {} WHERE {};".format(table, do_compile(ast, columns)) + +def do_compile(ast, columns): + if ast[0] == "REGEX": + cols = "({})".format(" || ".join(columns)) + return "{} REGEXP '.*{}.*'".format(cols, ast[1]) + + if ast[0] == "STRING": + cols = "({})".format(" || ".join(columns)) + return "{} LIKE '%{}%'".format(cols, ast[1]) + + if ast[0] == "SELECTION": + return compile_selection(ast) + + if ast[0] == "CONJUNCTION": + _, conj, lhs, rhs = ast + lhs = do_compile(lhs, columns) + rhs = do_compile(rhs, columns) + return "{} {} {}".format(lhs, conj, rhs) + + if ast[0] == "GROUPING": + return "({})".format(do_compile(ast[1], columns)) + + raise Exception("Unexpected AST: \"{}\"".format(ast)) + +def compile_selection(ast): + _, negate, column, query = ast + match = compile_query(negate, query) + return "{} {}".format(column, match) + +def compile_query(negate, query): + query_type, query_string = query + if query_type == "REGEX": + if negate: + return "NOT REGEXP '.*{}.*'".format(query_string) + return "REGEXP '.*{}.*'".format(query_string) + + if query_type == "STRING": + if negate: + return "NOT LIKE '%{}%'".format(query_string) + return "LIKE '%{}%'".format(query_string) + +################################################################################ # Main ################################################################################ def main(): while True: x = input("> ") - print(parse(x)) - + print("tokens:\t{}".format(tokenize(x))) + print("AST:\t{}".format(parse(x))) + # TODO(wpcarro): Read columns from CSV. + print("query:\t\"{}\"".format(compile(x, "Movies", [ + "year", + "rating", + "haveWatched", + "director", + "isCartoon", + "requiresSubtitles", + ]))) if __name__ == "__main__": main() |