diff options
Diffstat (limited to 'users/wpcarro/scratch')
31 files changed, 1760 insertions, 4 deletions
diff --git a/users/wpcarro/scratch/blockchain/default.nix b/users/wpcarro/scratch/blockchain/default.nix index 745e7a5ab490..c02f9a9c8108 100644 --- a/users/wpcarro/scratch/blockchain/default.nix +++ b/users/wpcarro/scratch/blockchain/default.nix @@ -2,7 +2,8 @@ let pypkgs = pkgs.python3Packages; -in pkgs.python3Packages.buildPythonApplication { +in +pkgs.python3Packages.buildPythonApplication { pname = "main"; src = ./.; version = "0.0.1"; diff --git a/users/wpcarro/scratch/compiler/.envrc b/users/wpcarro/scratch/compiler/.envrc new file mode 100644 index 000000000000..ff7eea1f7a05 --- /dev/null +++ b/users/wpcarro/scratch/compiler/.envrc @@ -0,0 +1,3 @@ +source_up + +use_nix diff --git a/users/wpcarro/scratch/compiler/.gitignore b/users/wpcarro/scratch/compiler/.gitignore new file mode 100644 index 000000000000..96261d3fc7e9 --- /dev/null +++ b/users/wpcarro/scratch/compiler/.gitignore @@ -0,0 +1,5 @@ +a.out +*.cmi +*.cmo +*.cmx +*.o \ No newline at end of file diff --git a/users/wpcarro/scratch/compiler/debug.ml b/users/wpcarro/scratch/compiler/debug.ml new file mode 100644 index 000000000000..e39ff13742be --- /dev/null +++ b/users/wpcarro/scratch/compiler/debug.ml @@ -0,0 +1,66 @@ +open Types + +(* Print x prefixed with tag and return x unchanged. *) +let print (f : 'a -> string) (tag : string) (x : 'a) : 'a = + Printf.printf "%s: %s\n" tag (f x); + x + +let rec ast (tree : Types.value) : string = + match tree with + | ValueLiteral (LiteralBool x) -> + Printf.sprintf "ValueLiteral (LiteralBool %s)" (string_of_bool x) + | ValueLiteral (LiteralInt x) -> + Printf.sprintf "ValueLiteral (LiteralInt %s)" (string_of_int x) + | ValueVariable x -> + Printf.sprintf "ValueVariable %s" x + | ValueFunction (x, body) -> + Printf.sprintf "ValueFunction (%s, %s)" x (ast body) + | ValueApplication (f, x) -> + Printf.sprintf "ValueApplication (%s, %s)" (ast f) (ast x) + | ValueVarApplication (f, x) -> + Printf.sprintf "ValueVarApplication (%s, %s)" f (ast x) + | ValueBinder (k, v, x) -> + Printf.sprintf "ValueBinder (%s, %s, %s)" k (ast v) (ast x) + +let rec value (x : value) : string = + match x with + | ValueLiteral (LiteralInt x) -> + Printf.sprintf "Int %d" x + | ValueLiteral (LiteralBool x) -> + Printf.sprintf "Bool %b" x + | ValueVariable x -> + Printf.sprintf "Var %s" x + | ValueFunction (name, x) -> + Printf.sprintf "Fn %s %s" name (value x) + | ValueApplication (f, x) -> + Printf.sprintf "App %s %s" (value f) (value x) + | ValueVarApplication (name, x) -> + Printf.sprintf "App %s %s" name (value x) + | ValueBinder (name, x, body) -> + Printf.sprintf "Bind %s %s %s" name (value x) (value body) + +let rec type' (t : _type) : string = + match t with + | TypeInt -> "Integer" + | TypeBool -> "Boolean" + | TypeVariable k -> Printf.sprintf "%s" k + | TypeArrow (a, b) -> Printf.sprintf "%s -> %s" (type' a) (type' b) + +let quantified_type (q : quantified_type) : string = + let QuantifiedType (vars, t) = q in + if List.length vars == 0 then + Printf.sprintf "%s" (type' t) + else + Printf.sprintf "forall %s. %s" (String.concat "," vars) (type' t) + +let substitution (s : substitution) : string = + FromString.fold (fun k v acc -> Printf.sprintf "%s\"%s\" |-> %s;" acc k (type' v)) s "" + |> Printf.sprintf "{ %s }" + +let env (s : env) : string = + FromString.fold (fun k v acc -> Printf.sprintf "%s\"%s\" |-> %s;" acc k (quantified_type v)) s "" + |> Printf.sprintf "{ %s }" + +let inference (Inference (s, t)) = + Printf.sprintf "type: %s; sub: %s" (type' t) (substitution s) + diff --git a/users/wpcarro/scratch/compiler/expr_parser.ml b/users/wpcarro/scratch/compiler/expr_parser.ml new file mode 100644 index 000000000000..797592931a2c --- /dev/null +++ b/users/wpcarro/scratch/compiler/expr_parser.ml @@ -0,0 +1,187 @@ +(******************************************************************************* + * CLI REPL for an s-expression Lambda Calculus. + * + * Lambda Calculus Expression Language: + * + * Helpers: + * symbol -> [-a-z]+ + * string -> '"' [^"]* '"' + * boolean -> 'true' | 'false' + * integer -> [1-9][0-9]* + * + * Core: + * expression -> funcdef + * binding -> '(' 'let' symbol expr expr ')' + * funcdef -> '(' 'fn' symbol expr ')' + * funccall -> '(' ( symbol | funcdef) expr ')' + * literal -> string | boolean | integer + * variable -> symbol + * + * Example Usage: + * $ ocamlopt types.ml str.cmxa inference.ml parser.ml expr_parser.ml && ./a.out + * repl> true + * tokens: [ "true" ] + * ast: ValueLiteral (LiteralBool true) + * Boolean + * repl> + * + ******************************************************************************) + +open Parser +open Inference +open Debug +open Prettify +open Vec + +type literal = LiteralBool of bool | LiteralInt of int + +let ( let* ) = Option.bind +let map = Option.map + +let tokenize (x : string) : token vec = + let xs = Vec.create () in + let i = ref 0 in + while !i < String.length x do + match x.[!i] with + | ' ' -> i := !i + 1 + (* strings *) + | '"' -> + let curr = ref "\"" in + i := !i + 1; + while x.[!i] != '"' do + curr := !curr ^ "?"; + i := !i + 1 + done; + curr := !curr ^ "\""; + Vec.append !curr xs; + i := !i + 1 + | '(' -> + Vec.append "(" xs; + i := !i + 1 + | ')' -> + Vec.append ")" xs; + i := !i + 1 + | _ -> + let token = ref "" in + while !i < String.length x && not (String.contains "() " x.[!i]) do + token := !token ^ String.make 1 x.[!i]; + i := !i + 1 + done; + Vec.append !token xs + done; + xs + +let parse_symbol (p : parser) : string option = + let* x = p#curr in + if Str.string_match (Str.regexp "[-a-z][0-9]*") x 0 then + begin + p#advance; + Some x + end + else + None + +let parse_variable (p : parser) : Types.value option = + let* x = parse_symbol p in + Some (Types.ValueVariable x) + +let parse_literal (p : parser) : Types.value option = + match p#curr with + | Some "true" -> + p#advance; + Some (ValueLiteral (LiteralBool true)) + | Some "false" -> + p#advance; + Some (ValueLiteral (LiteralBool false)) + | Some x -> + (match int_of_string_opt x with + | Some n -> + p#advance; + Some (ValueLiteral (LiteralInt n)) + | _ -> + if String.starts_with ~prefix:"\"" x then + begin + p#advance; + Some (ValueLiteral (LiteralString x)) + end + else + parse_variable p) + | _ -> None + +let rec parse_expression (p : parser) : Types.value option = + parse_binding p + +and parse_funccall (p : parser) : Types.value option = + match (p#curr, p#next) with + | (Some "(", Some "(") -> + p#advance; + let* f = parse_funcdef p in + let* x = parse_expression p in + p#expect ")"; + Some (Types.ValueApplication (f, x)) + | (Some "(", _) -> + p#advance; + let* f = parse_symbol p in + let* x = parse_expression p in + p#expect ")"; + Some (Types.ValueVarApplication (f, x)) + | _ -> parse_literal p + +and parse_funcdef (p : parser) : Types.value option = + match (p#curr, p#next) with + | (Some "(", Some "fn") -> + p#advance; + p#advance; + let* name = parse_symbol p in + let* body = parse_expression p in + p#expect ")"; + Some (Types.ValueFunction (name, body)) + | _ -> parse_funccall p + +and parse_binding (p : parser) : Types.value option = + match (p#curr, p#next) with + | (Some "(", Some "let") -> + p#advance; + p#advance; + let* name = parse_symbol p in + let* value = parse_expression p in + let* body = parse_expression p in + Some (Types.ValueBinder (name, value, body)) + | _ -> parse_funcdef p + +let print_tokens (xs : string vec) : unit = + xs + |> Vec.map (Printf.sprintf "\"%s\"") + |> Vec.join ", " + |> Printf.sprintf "tokens: [ %s ]" + |> print_string + |> print_newline + +let parse_language (x : string) : Types.value option = + let tokens = tokenize x in + print_tokens tokens; + parse_expression (new parser tokens) + +let main = + while true do + begin + print_string "repl> "; + let x = read_line () in + match parse_language x with + | Some ast -> + (match ast |> Debug.print Debug.ast "ast" |> do_infer with + | None -> + "Type-check failed" + |> print_string + |> print_newline + | Some x -> + x + |> Prettify.type' + |> print_string + |> print_newline) + | None -> + "Could not parse" + |> print_string + |> print_newline + end + done diff --git a/users/wpcarro/scratch/compiler/inference.ml b/users/wpcarro/scratch/compiler/inference.ml new file mode 100644 index 000000000000..e00904a09eed --- /dev/null +++ b/users/wpcarro/scratch/compiler/inference.ml @@ -0,0 +1,183 @@ +(******************************************************************************* + * WIP implementation of the Hindley-Milner type system primarily for learning + * purposes. + * + * Wish List: + * - TODO Debug this inference (let f (fn x x) f) + ******************************************************************************) + +open Types +open Debug + +(******************************************************************************* + * Library + ******************************************************************************) + +let ( let* ) = Option.bind + +let set_from_list (xs : string list) : set = + xs |> List.fold_left (fun acc x -> FromString.add x true acc) FromString.empty + +(* Map union that favors the rhs values (i.e. "last writer wins"). *) +let lww (xs : 'a FromString.t) (ys : 'a FromString.t) : 'a FromString.t = + FromString.union (fun k x y -> Some y) xs ys + +let emptyEnv : env = FromString.empty + +let rec free_type_vars (t : _type) : set = + match t with + | TypeVariable k -> FromString.singleton k true + | TypeInt -> FromString.empty + | TypeBool -> FromString.empty + | TypeString -> FromString.empty + | TypeArrow (a, b) -> lww (free_type_vars a) (free_type_vars b) + +let i : int ref = ref 0 + +let make_type_var () : _type = + let res = Printf.sprintf "a%d" !i in + i := !i + 1; + TypeVariable res + +exception OccursCheck + +let bind_var (k : string) (t : _type) : substitution = + if t == TypeVariable k then FromString.empty + else if FromString.exists (fun name _ -> name == k) (free_type_vars t) then + raise OccursCheck + else FromString.singleton k t + +let rec instantiate (q : quantified_type) : _type = + let (QuantifiedType (names, t)) = q in + match t with + | TypeInt -> TypeInt + | TypeBool -> TypeBool + | TypeString -> TypeString + | TypeVariable k -> + if List.exists (( == ) k) names then make_type_var () else TypeVariable k + | TypeArrow (a, b) -> + TypeArrow + (instantiate (QuantifiedType (names, a)), instantiate (QuantifiedType (names, b))) + +let quantified_type_ftvs (q : quantified_type) : set = + let (QuantifiedType (names, t)) = q in + lww (free_type_vars t) (names |> set_from_list) + +let generalize (env : env) (t : _type) : quantified_type = + let envftv = + env |> FromString.bindings + |> List.map (fun (_, v) -> quantified_type_ftvs v) + |> List.fold_left lww FromString.empty + in + let names = + lww (free_type_vars t) envftv + |> FromString.bindings + |> List.map (fun (k, _) -> k) + in + QuantifiedType (names, t) + +let rec substitute_type (s : substitution) (t : _type) : _type = + match t with + | TypeVariable k as tvar -> + (match FromString.find_opt k s with + | Some v -> substitute_type s v + | None -> tvar) + | TypeArrow (a, b) -> TypeArrow (substitute_type s a, substitute_type s b) + | TypeInt -> TypeInt + | TypeBool -> TypeBool + | TypeString -> TypeString + +let substitute_quantified_type (s : substitution) (q : quantified_type) : quantified_type = + let (QuantifiedType (names, t)) = q in + let s1 = + FromString.filter (fun k v -> List.exists (fun x -> k != x) names) s + in + QuantifiedType (names, substitute_type s1 t) + +let substitute_env (s : substitution) (env : env) : env = + FromString.map (fun q -> substitute_quantified_type s q) env + +let compose_substitutions (xs : substitution list) : substitution = + let do_compose_substitutions s1 s2 = lww s2 (FromString.map (substitute_type s2) s1) in + List.fold_left do_compose_substitutions FromString.empty xs + +let rec unify (a : _type) (b : _type) : substitution option = + match (a, b) with + | TypeInt, TypeInt -> Some FromString.empty + | TypeBool, TypeBool -> Some FromString.empty + | TypeString, TypeString -> Some FromString.empty + | TypeVariable k, _ -> Some (bind_var k b) + | _, TypeVariable k -> Some (bind_var k a) + | TypeArrow (a, b), TypeArrow (c, d) -> + let* s1 = unify a c in + let* s2 = unify (substitute_type s1 b) (substitute_type s1 d) in + let s3 = compose_substitutions [s1; s2] in + s1 |> Debug.substitution |> Printf.sprintf "s1: %s\n" |> print_string; + s2 |> Debug.substitution |> Printf.sprintf "s2: %s\n" |> print_string; + s3 |> Debug.substitution |> Printf.sprintf "s3: %s\n" |> print_string; + Some s3 + | _ -> None + +let print_env (env : env) = + Printf.sprintf "env: %s\n" (Debug.env env) + |> print_string + +let print_val (x : value) = + Printf.sprintf "val: %s\n" (Debug.value x) + |> print_string + +let print_inference (x : inference option) = + match x with + | None -> "no inference\n" |> print_string + | Some x -> + Printf.sprintf "inf: %s\n" (Debug.inference x) + |> print_string + +let rec infer (env : env) (x : value) : inference option = + print_env env; + print_val x; + let res = match x with + | ValueLiteral lit -> ( + match lit with + | LiteralInt _ -> Some (Inference (FromString.empty, TypeInt)) + | LiteralBool _ -> Some (Inference (FromString.empty, TypeBool)) + | LiteralString _ -> Some (Inference (FromString.empty, TypeString))) + | ValueVariable k -> + let* v = FromString.find_opt k env in + Some (Inference (FromString.empty, instantiate v)) + | ValueFunction (param, body) -> + let typevar = make_type_var () in + let env1 = FromString.remove param env in + let env2 = lww (FromString.singleton param (QuantifiedType ([], typevar))) env1 in + let* (Inference (s1, t1)) = infer env2 body in + Some (Inference (s1, TypeArrow (substitute_type s1 typevar, t1))) + | ValueApplication (f, x) -> + let result = make_type_var () in + let* (Inference (s1, t1)) = infer env f in + let* (Inference (s2, t2)) = infer (substitute_env s1 env) x in + let* s3 = unify (substitute_type s2 t1) (TypeArrow (t2, result)) in + Some (Inference + ( compose_substitutions [s3; s2; s1], + substitute_type s3 result )) + | ValueVarApplication (name, x) -> + let* v = FromString.find_opt name env in + let t1 = instantiate v in + let typevar = make_type_var () in + let* (Inference (s2, t2)) = infer env x in + let* s3 = unify (substitute_type s2 t1) (TypeArrow (t2, typevar)) in + Some (Inference + ( compose_substitutions [s2; s3], + substitute_type s3 typevar )) + | ValueBinder (k, v, body) -> + let* (Inference (s1, t1)) = infer env v in + let env1 = FromString.remove k env in + let tg = generalize (substitute_env s1 env) t1 in + let env2 = FromString.add k tg env1 in + let* (Inference (s2, t2)) = infer (substitute_env s1 env2) body in + Some (Inference (compose_substitutions [s1; s2], t2)) in + print_inference res; + res + +let do_infer (x : value) : _type option = + let* Inference (_, t) = infer FromString.empty x in + Some t diff --git a/users/wpcarro/scratch/compiler/parser.ml b/users/wpcarro/scratch/compiler/parser.ml new file mode 100644 index 000000000000..dc66f2506ed3 --- /dev/null +++ b/users/wpcarro/scratch/compiler/parser.ml @@ -0,0 +1,47 @@ +(****************************************************************************** + * Defines a generic parser class. + ******************************************************************************) + +open Vec + +exception ParseError of string + +type token = string +type state = { i : int; tokens : token vec } + +class parser (tokens : token vec) = + object (self) + val mutable tokens = tokens + val mutable i = ref 0 + + method advance = i := !i + 1 + method prev : token option = Vec.get (!i - 1) tokens + method curr : token option = Vec.get !i tokens + method next : token option = Vec.get (!i + 1) tokens + + method consume : token option = + match self#curr with + | None -> None + | Some x as res -> + self#advance; + res + + method expect (x : token) = + match self#curr with + | Some y when x = y -> self#advance + | _ -> raise (ParseError (Printf.sprintf "Expected %s" x)) + + method matches (x : token) : bool = + match self#curr with + | None -> false + | Some y -> + if x = y then + begin + self#advance; + true + end + else false + + method exhausted : bool = !i >= Vec.length tokens + method state : state = { i = !i; tokens } + end diff --git a/users/wpcarro/scratch/compiler/prettify.ml b/users/wpcarro/scratch/compiler/prettify.ml new file mode 100644 index 000000000000..7903ad36947c --- /dev/null +++ b/users/wpcarro/scratch/compiler/prettify.ml @@ -0,0 +1,9 @@ +open Types + +(* Pretty-print the type, t. *) +let rec type' (t : _type) : string = + match t with + | TypeInt -> "Integer" + | TypeBool -> "Boolean" + | TypeVariable k -> Printf.sprintf "%s" k + | TypeArrow (a, b) -> Printf.sprintf "%s -> %s" (type' a) (type' b) diff --git a/users/wpcarro/scratch/compiler/register_vm.ml b/users/wpcarro/scratch/compiler/register_vm.ml new file mode 100644 index 000000000000..0a573048e77e --- /dev/null +++ b/users/wpcarro/scratch/compiler/register_vm.ml @@ -0,0 +1,129 @@ +(* + Rewriting the Python implementation of the register VM in OCaml to see how + how much imperative/mutative programming OCaml allows. + + Note: Some of this code is intentionally not written in a functional style + because one of the goals was to see how similar this OCaml implementation + could be to the Python implementation. + + Conclusion: It's pretty easy to switch between the two languages. + + Usage: Recommended compilation settings I hastily found online: + $ ocamlopt -w +A-42-48 -warn-error +A-3-44 ./register_vm.ml && ./a.out + + Formatting: + $ ocamlformat --inplace --enable-outside-detected-project ./register_vm.ml + *) + +open Vec + +type reg = X | Y | Res +type binop = int -> int -> int + +type ast = + | Const of int + | Add of ast * ast + | Sub of ast * ast + | Mul of ast * ast + | Div of ast * ast + +type opcode0 = + | Op0AssignRegLit of reg * int + | Op0AssignRegReg of reg * reg + | Op0BinOp of binop * reg * reg * reg + | Op0PushReg of reg + | Op0PopAndSet of reg + | Op0Null + +type opcode1 = + | Op1AssignRegLit of int * int + | Op1AssignRegReg of int * int + | Op1BinOp of (int -> int -> int) * int * int * int + | Op1PushReg of int + | Op1PopAndSet of int + | Op1Null + +type opcodes0 = opcode0 vec +type opcodes1 = opcode1 vec + +let registers : int vec = Vec.make 8 0 +let stack : int Stack.t = Stack.create () +let reg_idx (r : reg) : int = match r with X -> 0 | Y -> 1 | Res -> 2 + +let reg_name (r : reg) : string = + match r with X -> "x" | Y -> "y" | Res -> "res" + +let print_opcodes0 (xs : opcodes0) : opcodes0 = + let print_opcode x = + match x with + | Op0AssignRegLit (r, x) -> Printf.printf "%s <- %d\n" (reg_name r) x + | Op0AssignRegReg (dst, src) -> + Printf.printf "%s <- $%s\n" (reg_name dst) (reg_name src) + | Op0PushReg src -> Printf.printf "push $%s\n" (reg_name src) + | Op0PopAndSet dst -> Printf.printf "%s <- pop\n" (reg_name dst) + | Op0BinOp (_, lhs, rhs, dst) -> + Printf.printf "%s <- $%s ? $%s\n" (reg_name dst) (reg_name lhs) + (reg_name rhs) + | Op0Null -> () + in + Vec.iter print_opcode xs; + xs + +let rec compile (ast : ast) : opcodes0 = + let result : opcodes0 = Vec.create () in + (match ast with + | Const x -> Vec.append (Op0AssignRegLit (Res, x)) result; + | Add (lhs, rhs) -> compile_bin_op ( + ) lhs rhs result + | Sub (lhs, rhs) -> compile_bin_op ( - ) lhs rhs result + | Mul (lhs, rhs) -> compile_bin_op ( * ) lhs rhs result + | Div (lhs, rhs) -> compile_bin_op ( / ) lhs rhs result); + result + +and compile_bin_op (f : binop) (lhs : ast) (rhs : ast) (result : opcodes0) = + lhs |> compile |> Vec.append_to result; + Vec.append (Op0PushReg Res) result; + rhs |> compile |> Vec.append_to result; + Vec.append (Op0PopAndSet X) result; + Vec.append (Op0AssignRegReg (Y, Res)) result; + Vec.append (Op0BinOp (f, X, Y, Res)) result + +let compile_registers (xs : opcodes0) : opcodes1 = + let do_compile x = + match x with + | Op0AssignRegLit (dst, x) -> Op1AssignRegLit (reg_idx dst, x) + | Op0AssignRegReg (dst, src) -> Op1AssignRegReg (reg_idx dst, reg_idx src) + | Op0PushReg src -> Op1PushReg (reg_idx src) + | Op0PopAndSet dst -> Op1PopAndSet (reg_idx dst) + | Op0BinOp (f, lhs, rhs, dst) -> Op1BinOp (f, reg_idx lhs, reg_idx rhs, reg_idx dst) + | Op0Null -> Op1Null + in + Vec.map do_compile xs + +let eval (xs : opcodes1) : int = + let ip = ref 0 in + while !ip < Vec.length xs do + match Vec.get_unsafe !ip xs with + | Op1AssignRegLit (dst, x) -> + Vec.set dst x registers; + ip := !ip + 1 + | Op1AssignRegReg (dst, src) -> + Vec.set dst (Vec.get_unsafe src registers) registers; + ip := !ip + 1 + | Op1PushReg src -> + Stack.push (Vec.get_unsafe src registers) stack; + ip := !ip + 1 + | Op1PopAndSet dst -> + Vec.set dst (Stack.pop stack) registers; + ip := !ip + 1 + | Op1BinOp (f, lhs, rhs, dst) -> + let lhs = Vec.get_unsafe lhs registers in + let rhs = Vec.get_unsafe rhs registers in + Vec.set dst (f lhs rhs) registers; + ip := !ip + 1 + | Op1Null -> ip := !ip + 1 + done; + Vec.get_unsafe (reg_idx Res) registers +;; + +Add (Mul (Const 2, Div (Const 100, Const 2)), Const 5) +|> compile |> print_opcodes0 |> compile_registers |> eval |> print_int diff --git a/users/wpcarro/scratch/compiler/register_vm.py b/users/wpcarro/scratch/compiler/register_vm.py new file mode 100644 index 000000000000..302bce5a0ecb --- /dev/null +++ b/users/wpcarro/scratch/compiler/register_vm.py @@ -0,0 +1,161 @@ +# Silly proof-of-concept register VM. + +def compile_binary_op(op, ast): + result = [] + for x in compile(ast[1]): + result.append(x) + result.append(PUSH_REG) + result.append(RES) + for x in compile(ast[2]): + result.append(x) + result.append(ASSIGN_REG_REG) + result.append(Y) + result.append(RES) + result.append(POP) + result.append(X) + result.append(op) + return result + +def compile(ast): + result = [] + + if ast[0] == 'CONST': + result.append(ASSIGN_REG_LIT) + result.append(RES) + result.append(ast[1]) + elif ast[0] == 'ADD': + result += compile_binary_op(ADD, ast) + elif ast[0] == 'SUB': + result += compile_binary_op(SUB, ast) + elif ast[0] == 'MUL': + result += compile_binary_op(MUL, ast) + elif ast[0] == 'DIV': + result += compile_binary_op(DIV, ast) + elif ast[0] == 'RETURN': + result.append(RETURN) + else: + raise Exception('Cannot compile unknown AST node: {}'.format(ast[0])) + + return result + +# opcodes +ASSIGN_REG_LIT = 0x0 +ASSIGN_REG_REG = 0x1 +ADD = 0x2 +SUB = 0x3 +MUL = 0x4 +DIV = 0x5 +SWAP = 0x6 +RETURN = 0x7 +PUSH_REG = 0x8 +POP = 0x9 + +# register indices +X = 0x0 +Y = 0x1 +RES = 0x2 + +registers = [0x0] * 8 +stack = [] + +def reg_name(i): + if i == X: return 'x' + if i == Y: return 'x' + if i == RES: return 'res' + +def print_instructions(xs): + i = 0 + + while i < len(xs): + if xs[i] == ASSIGN_REG_LIT: + # print('ASSIGN_REG_LIT {} {}'.format(reg_name(xs[i + 1]), xs[i + 2])) + print('{} <- {}'.format(reg_name(xs[i + 1]), xs[i + 2])) + i += 3 + elif xs[i] == ASSIGN_REG_REG: + # print('ASSIGN_REG_REG {} {}'.format(reg_name(xs[i + 1]), reg_name(xs[i + 2]))) + print('{} <- ${}'.format(reg_name(xs[i + 1]), reg_name(xs[i + 2]))) + i += 3 + elif xs[i] == ADD: + print('add') + i += 1 + elif xs[i] == SUB: + print('sub') + i += 1 + elif xs[i] == MUL: + print('mul') + i += 1 + elif xs[i] == DIV: + print('div') + i += 1 + elif xs[i] == PUSH_REG: + print('push ${}'.format(reg_name(xs[i + 1]))) + i += 2 + elif xs[i] == POP: + print('{} <- pop'.format(reg_name(xs[i + 1]))) + i += 2 + else: + raise Exception('Cannot print instruction: {}'.format(xs[i])) + +def eval(instructions): + print_instructions(instructions) + ip = 0 + cont = True + while ip < len(instructions): + if instructions[ip] == ASSIGN_REG_LIT: + r = instructions[ip + 1] + x = instructions[ip + 2] + registers[r] = x + ip += 3 + elif instructions[ip] == ASSIGN_REG_REG: + r_dst = instructions[ip + 1] + r_src = instructions[ip + 2] + registers[r_dst] = registers[r_src] + ip += 3 + elif instructions[ip] == ADD: + registers[RES] = registers[X] + registers[Y] + ip += 1 + elif instructions[ip] == MUL: + registers[RES] = registers[X] * registers[Y] + ip += 1 + elif instructions[ip] == SUB: + registers[RES] = registers[X] - registers[Y] + ip += 1 + elif instructions[ip] == MUL: + registers[RES] = registers[X] * registers[Y] + ip += 1 + elif instructions[ip] == DIV: + registers[RES] = registers[X] / registers[Y] + ip += 1 + elif instructions[ip] == SWAP: + r1 = instructions[ip + 1] + r2 = instructions[ip + 2] + registers[r1], registers[r2] = registers[r2], registers[r1] + ip += 3 + elif instructions[ip] == RETURN: + ip += 1 + cont = False + return registers[RES] + elif instructions[ip] == PUSH_REG: + src = instructions[ip + 1] + stack.append(registers[src]) + ip += 2 + elif instructions[ip] == POP: + dst = instructions[ip + 1] + registers[dst] = stack.pop() + ip += 2 + else: + raise Exception('Cannot eval instruction: {}'.format(instructions[ip])) + return registers[RES] + +def main(): + ast = ['ADD', + ['MUL', + ['MUL', ['CONST', 2], ['CONST', 3]], + ['DIV', ['CONST', 5], ['CONST', 5]]], + ['ADD', + ['SUB', ['CONST', 10], ['CONST', 1]], + ['MUL', ['CONST', 2], ['CONST', 2]]]] + + print('result: {}'.format(eval(compile(ast)))) + +main() diff --git a/users/wpcarro/scratch/compiler/shell.nix b/users/wpcarro/scratch/compiler/shell.nix new file mode 100644 index 000000000000..ec339eb91d98 --- /dev/null +++ b/users/wpcarro/scratch/compiler/shell.nix @@ -0,0 +1,9 @@ +{ pkgs, ... }: + +pkgs.mkShell { + buildInputs = with pkgs; [ + ocaml + ocamlPackages.utop + ocamlformat + ]; +} diff --git a/users/wpcarro/scratch/compiler/tests.ml b/users/wpcarro/scratch/compiler/tests.ml new file mode 100644 index 000000000000..828cbd16f090 --- /dev/null +++ b/users/wpcarro/scratch/compiler/tests.ml @@ -0,0 +1,43 @@ +open Expr_parser +open Type_parser +open Inference + +type test = { input : string; expect : string; } +(* type sub_test = { s1 : string; s2 : string; s3 : string } *) + +let ( let* ) = Option.bind + +let tests = [ + { input = "((fn x x) 10)"; expect = "Integer"; }; + { input = "(let f (fn x x) f)"; expect = "a -> a"; }; +] + +(* let sub_tests = [ *) +(* { *) +(* s1 = "{b |-> b -> Int}"; *) +(* s2 = "{a: Bool, b: Int, c: Bool}"; *) +(* s3 = "{a: Bool, b: Int -> Int, c: Bool}"; *) +(* } *) +(* ] *) + +exception FailedAssertion +exception TestError + +let main = + tests + |> List.iter (fun { input; expect } -> + Printf.sprintf ":t %s == %s\n" input expect |> print_string; + match (parse_language input, parse_input expect) with + | Some ast, Some expected -> + (match do_infer ast with + | Some actual -> + if actual != expected then + begin + print_type actual; + raise FailedAssertion + end + else + print_string "Test passed.\n" + | _ -> raise TestError) + | _ -> raise TestError); + print_string "All tests pass!" diff --git a/users/wpcarro/scratch/compiler/type_parser.ml b/users/wpcarro/scratch/compiler/type_parser.ml new file mode 100644 index 000000000000..99cc8bbc4f4e --- /dev/null +++ b/users/wpcarro/scratch/compiler/type_parser.ml @@ -0,0 +1,104 @@ +(****************************************************************************** + * Type Expression Language: + * + * Helpers: + * symbol -> [a-z] + * + * Core: + * type -> function + * function -> ( variable | literal ) '->' type + * literal -> 'Integer' | 'Boolean' + * variable -> symbol + ******************************************************************************) + +open Types +open Prettify +open Parser +open Inference +open Vec + +type side = LHS | RHS + +let ( let* ) = Option.bind + +let printsub (s : substitution) = + s |> Debug.substitution |> print_string |> print_newline + +let tokenize (x : string) : token vec = + let xs = Vec.create () in + let i = ref 0 in + while !i < String.length x do + match x.[!i] with + | ' ' -> i := !i + 1 + | _ -> + let beg = !i in + while (!i < String.length x) && (x.[!i] != ' ') do + i := !i + 1 + done; + Vec.append (String.sub x beg (!i - beg)) xs + done; + xs + +let rec parse_type (p : parser) : _type option = + parse_function p +and parse_function (p : parser) : _type option = + match p#next with + | Some "->" -> + let* a = parse_literal p in + p#advance; + let* b = parse_type p in + Some (TypeArrow (a, b)) + | _ -> parse_literal p +and parse_literal (p : parser) : _type option = + match p#curr with + | Some "Integer" | Some "Int" -> p#advance; Some TypeInt + | Some "Boolean" | Some "Bool" -> p#advance; Some TypeBool + | Some _ -> parse_variable p + | None -> None +and parse_variable (p : parser) : _type option = + match p#curr with + | Some x when String.length x = 1 -> p#advance; Some (TypeVariable x) + | _ -> None + +let print_tokens (xs : string vec) = + xs + |> Vec.map (Printf.sprintf "\"%s\"") + |> Vec.join ", " + |> Printf.sprintf "tokens: [ %s ]" + |> print_string + |> print_newline + +let print_type (t : _type) = + t |> Debug.type' |> Printf.sprintf "type: %s" |> print_string |> print_newline + +let parse_input (x : string) : _type option = + let tokens = tokenize x in + print_tokens tokens; + parse_type (new parser tokens) + +(* Continually prompt until user provides a parseable type expression *) +let rec read_type (arg : side) : _type = + let prompt = match arg with + | LHS -> "lhs> " + | RHS -> "rhs> " in + print_string prompt; + let x = read_line () in + match parse_input x with + | None -> + print_string "Failed to parse input.\n"; + read_type arg + | Some ast -> + print_type ast; + ast + +let main = + while true do + begin + let lhs = read_type LHS in + let rhs = read_type RHS in + match unify lhs rhs with + | None -> + Printf.printf "Cannot unify \"%s\" with \"%s\"\n" (Debug.type' lhs) (Debug.type' rhs) + | Some x -> printsub x + end + done diff --git a/users/wpcarro/scratch/compiler/types.ml b/users/wpcarro/scratch/compiler/types.ml new file mode 100644 index 000000000000..0acd05737cdc --- /dev/null +++ b/users/wpcarro/scratch/compiler/types.ml @@ -0,0 +1,31 @@ +type literal + = LiteralInt of int + | LiteralBool of bool + | LiteralString of string + +(* Lambda Calculus definition *) +type value = + | ValueLiteral of literal + | ValueVariable of string + | ValueFunction of string * value + | ValueApplication of value * value + | ValueVarApplication of string * value + | ValueBinder of string * value * value + +module FromString = Map.Make (String) + +type _type = + | TypeInt + | TypeBool + | TypeString + | TypeVariable of string + | TypeArrow of _type * _type + +type quantified_type = QuantifiedType of string list * _type + +type set = bool FromString.t +type substitution = _type FromString.t + +type env = quantified_type FromString.t + +type inference = Inference of substitution * _type diff --git a/users/wpcarro/scratch/compiler/vec.ml b/users/wpcarro/scratch/compiler/vec.ml new file mode 100644 index 000000000000..549078c5d87a --- /dev/null +++ b/users/wpcarro/scratch/compiler/vec.ml @@ -0,0 +1,127 @@ +(****************************************************************************** + * Similar to Python's list + * + * - mutable + * - dynamically resized + * - O(1) read + * - O(1) write + * - O(1) append (average case) + * + ******************************************************************************) + +type 'a vec = { + mutable length: int; + mutable capacity: int; + mutable xs: 'a array; +} + +(****************************************************************************** + * Constructors + ******************************************************************************) + +let make (size : int) (seed : 'a) : 'a vec = { + length = size; + capacity = size; + xs = Array.make size seed; +} + +let create () = { + length = 0; + capacity = 0; + xs = [||]; +} + +let from_array (xs : 'a array) : 'a vec = { + length = Array.length xs; + capacity = Array.length xs; + xs = xs; +} + +let from_list (xs : 'a list) : 'a vec = + match xs with + | [] -> create () + | y::ys -> + let result = { + length = List.length xs; + capacity = List.length xs; + xs = Array.make (List.length xs) y; + } in + List.iteri (fun i x -> Array.set result.xs i x) xs; + result + +(****************************************************************************** + * Miscellaneous + ******************************************************************************) + +let append (x : 'a) (v : 'a vec) = + if v.capacity = 0 then + begin + v.length <- 1; + v.capacity <- 1; + v.xs <- [|x|]; + end + else if v.length = v.capacity then + begin + (* According to Wikipedia, Python uses 1.25 as the growth factor *) + let new_cap = v.capacity |> float_of_int |> Float.mul 1.25 |> ceil |> int_of_float in + let new_xs = Array.make new_cap x in + Array.iteri (fun i x -> Array.set new_xs i x) v.xs; + v.capacity <- new_cap; + v.xs <- new_xs; + Array.set v.xs v.length x; + v.length <- v.length + 1; + end + else + begin + Array.set v.xs v.length x; + v.length <- v.length + 1; + end + +let get (i : int) (v : 'a vec) : 'a option = + if i >= v.length then + None + else + Some v.xs.(i) + +let get_unsafe (i : int) (v : 'a vec) : 'a = + v.xs.(i) + +let set (i : int) (x : 'a) (v : 'a vec) : unit = + if i < v.length then + Array.set v.xs i x + +let length (v : 'a vec) : int = + v.length + +let update (i : int) (f : 'a -> 'a) (v : 'a vec) : unit = + match get i v with + | None -> () + | Some x -> set i (f x) v + +let iter (f : 'a -> unit) (v : 'a vec) : unit = + let n = ref 0 in + while !n < v.length do + f v.xs.(!n); + n := !n + 1; + done + +let join (sep : string) (v : string vec) : string = + if length v = 0 then + "" + else + let i = ref 1 in + let result = ref v.xs.(0) in + while !i < v.length do + result := !result ^ sep ^ v.xs.(!i); + i := !i + 1; + done; + !result + +let map (f : 'a -> 'b) (v : 'a vec) : 'b vec = + let result = create () in + iter (fun x -> append (f x) result) v; + result + +let append_to (dst : 'a vec) (xs : 'a vec) : unit = + iter (fun x -> append x dst) xs + diff --git a/users/wpcarro/scratch/groceries/shell.nix b/users/wpcarro/scratch/groceries/shell.nix index 7682e8246cac..0c6a298bf2b0 100644 --- a/users/wpcarro/scratch/groceries/shell.nix +++ b/users/wpcarro/scratch/groceries/shell.nix @@ -1,5 +1,5 @@ { depot, ... }: depot.users.wpcarro.buildHaskell.shell { - deps = hpkgs: []; + deps = hpkgs: [ ]; } diff --git a/users/wpcarro/scratch/picoctf/challenge_166/shell.nix b/users/wpcarro/scratch/picoctf/challenge_166/shell.nix index 07a3a2e281b4..85d3865a51bf 100644 --- a/users/wpcarro/scratch/picoctf/challenge_166/shell.nix +++ b/users/wpcarro/scratch/picoctf/challenge_166/shell.nix @@ -1,7 +1,8 @@ { pkgs, ... }: let - python =pkgs.python3.withPackages (pypkgs: with pypkgs; [ + python = pkgs.python3.withPackages (pypkgs: with pypkgs; [ cryptography ]); -in python.env +in +python.env diff --git a/users/wpcarro/scratch/rust/.gitignore b/users/wpcarro/scratch/rust/.gitignore new file mode 100644 index 000000000000..9f970225adb6 --- /dev/null +++ b/users/wpcarro/scratch/rust/.gitignore @@ -0,0 +1 @@ +target/ \ No newline at end of file diff --git a/users/wpcarro/scratch/rust/Cargo.lock b/users/wpcarro/scratch/rust/Cargo.lock new file mode 100644 index 000000000000..28aa1250cea4 --- /dev/null +++ b/users/wpcarro/scratch/rust/Cargo.lock @@ -0,0 +1,89 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "itoa" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" + +[[package]] +name = "proc-macro2" +version = "1.0.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rust" +version = "0.1.0" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "ryu" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" + +[[package]] +name = "serde" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "1.0.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" diff --git a/users/wpcarro/scratch/rust/Cargo.toml b/users/wpcarro/scratch/rust/Cargo.toml new file mode 100644 index 000000000000..76235d11d37d --- /dev/null +++ b/users/wpcarro/scratch/rust/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "rust" +version = "0.1.0" +edition = "2021" + +[dependencies] +serde_json = "1.0.81" +serde = { version = "1.0.137", features = ["derive"] } diff --git a/users/wpcarro/scratch/rust/README.md b/users/wpcarro/scratch/rust/README.md new file mode 100644 index 000000000000..9ff7dd97eaf6 --- /dev/null +++ b/users/wpcarro/scratch/rust/README.md @@ -0,0 +1,11 @@ +# Rust + +Watch me fumble around as I learn Rust. + +## Usage + +```shell +$ nix-shell /depot -A users.wpcarro.scratch.rust +$ cargo new json && cd ./json +$ cargo run json +``` diff --git a/users/wpcarro/scratch/rust/shell.nix b/users/wpcarro/scratch/rust/shell.nix new file mode 100644 index 000000000000..98e2dbf4b29b --- /dev/null +++ b/users/wpcarro/scratch/rust/shell.nix @@ -0,0 +1,7 @@ +{ pkgs ? import <nixpkgs> { }, ... }: + +pkgs.mkShell { + buildInputs = [ + pkgs.cargo + ]; +} diff --git a/users/wpcarro/scratch/rust/src/display/mod.rs b/users/wpcarro/scratch/rust/src/display/mod.rs new file mode 100644 index 000000000000..838463109190 --- /dev/null +++ b/users/wpcarro/scratch/rust/src/display/mod.rs @@ -0,0 +1,13 @@ +use std::fmt; + +pub struct Person { + pub fname: String, + pub lname: String, + pub age: i8, +} + +impl fmt::Display for Person { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}, {} ({} years old)", self.lname, self.fname, self.age) + } +} diff --git a/users/wpcarro/scratch/rust/src/json/mod.rs b/users/wpcarro/scratch/rust/src/json/mod.rs new file mode 100644 index 000000000000..d3307b394ea4 --- /dev/null +++ b/users/wpcarro/scratch/rust/src/json/mod.rs @@ -0,0 +1,81 @@ +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; + +// From the serde_json docs: +// +// > There are three common ways that you might find yourself needing to work +// > with JSON data in Rust. +// > +// > 1. As text data. An unprocessed string of JSON data that you receive on an +// > HTTP endpoint, read from a file, or prepare to send to a remote server. +// > 2. As an untyped or loosely typed representation. Maybe you want to check +// > that some JSON data is valid before passing it on, but without knowing +// > the structure of what it contains. Or you want to do very basic +// > manipulations like insert a key in a particular spot. +// > 3. As a strongly typed Rust data structure. When you expect all or most of +// > your data to conform to a particular structure and want to get real work +// > done without JSON’s loosey-goosey nature tripping you up. +// +// So let's take a look at all three... + +//////////////////////////////////////////////////////////////////////////////// +// Types +//////////////////////////////////////////////////////////////////////////////// + +#[derive(Serialize, Deserialize, Debug)] +struct Person { + fname: String, + lname: String, + age: u8, +} + +//////////////////////////////////////////////////////////////////////////////// +// Functions +//////////////////////////////////////////////////////////////////////////////// + +// 1) Reading/writing from/to plain text. +// TL;DR: +// - read: serde_json::from_str(data) +// - write: x.to_string() +pub fn one() { + let data = json!({ + "fname": "William", + "lname": "Carroll", + "age": 30, + }) + .to_string(); + + println!("result: {:?}", data); +} + +// 2) Parse into a loosely typed representation; mutate it; serialize it back. +// TL;DR: +// - read: serde_json::from_str(data) +// - write: x.to_string() +pub fn two() { + let data = r#"{"fname":"William","lname":"Carroll","age":30}"#; + + let mut parsed: Value = serde_json::from_str(data).unwrap(); + parsed["fname"] = json!("Norm"); + parsed["lname"] = json!("Macdonald"); + parsed["age"] = json!(61); + + let result = parsed.to_string(); + println!("result: {:?}", result); +} + +// 3) Parse into a strongly typed structure. +// TL;DR: +// - read: serde_json::from_str(data) +// - write: serde_json::to_string(x).unwrap() +pub fn three() { + let data = r#"{"fname":"William","lname":"Carroll","age":30}"#; + + let mut read: Person = serde_json::from_str(data).unwrap(); + read.fname = "Norm".to_string(); + read.lname = "Macdonald".to_string(); + read.age = 61; + + let write = serde_json::to_string(&read).unwrap(); + println!("result: {:?}", write); +} diff --git a/users/wpcarro/scratch/rust/src/main.rs b/users/wpcarro/scratch/rust/src/main.rs new file mode 100644 index 000000000000..671b33093050 --- /dev/null +++ b/users/wpcarro/scratch/rust/src/main.rs @@ -0,0 +1,15 @@ +use serde::{Deserialize, Serialize}; +use serde_json::{json, Value}; + +mod display; +mod json; +mod rc; +mod stdin; + +//////////////////////////////////////////////////////////////////////////////// +// Main +//////////////////////////////////////////////////////////////////////////////// + +fn main() { + rc::example(); +} diff --git a/users/wpcarro/scratch/rust/src/rc/mod.rs b/users/wpcarro/scratch/rust/src/rc/mod.rs new file mode 100644 index 000000000000..67251ca6aa9b --- /dev/null +++ b/users/wpcarro/scratch/rust/src/rc/mod.rs @@ -0,0 +1,12 @@ +// Playing around with Rust's "smart pointers". Starting off with a wrapper type +// that allows multiple readers (owners?) of some data. + +use std::rc::Rc; + +pub fn example() { + let five = Rc::new(5); + let x = Rc::clone(&five); + let y = Rc::clone(&five); + let z = Rc::clone(&five); + println!("result: {}", *x + *y + *z) +} diff --git a/users/wpcarro/scratch/rust/src/stdin/mod.rs b/users/wpcarro/scratch/rust/src/stdin/mod.rs new file mode 100644 index 000000000000..4be95afa4547 --- /dev/null +++ b/users/wpcarro/scratch/rust/src/stdin/mod.rs @@ -0,0 +1,22 @@ +use std::io::Write; +use std::process::{Command, Stdio}; + +// Example of piping-in a string defined in Rust to a shell command. +pub fn example() { + let input = "Hello, world!"; + + let mut cat = Command::new("cat") + .stdin(Stdio::piped()) + .spawn() + .ok() + .unwrap(); + + cat.stdin + .take() + .unwrap() + .write_all(&input.as_bytes()) + .unwrap(); + + let output = cat.wait_with_output().unwrap(); + println!("{}", String::from_utf8_lossy(&output.stdout)); +} diff --git a/users/wpcarro/scratch/simple-select/README.md b/users/wpcarro/scratch/simple-select/README.md new file mode 100644 index 000000000000..69e5707302ab --- /dev/null +++ b/users/wpcarro/scratch/simple-select/README.md @@ -0,0 +1,71 @@ +# Simple Select + +- Simple Select is a less expressive but more ergonomic query language for + tabular data than SQL. +- `slx` is a command-line tool for querying CSVs using the Simple Select query + language. + +Simple Select queries look like this: `director:"Tarantino" OR director:"Scorsese"`. + +## Example + +Say we have the following data in a CSV: + +```csv +title,year,rating,director +"Spirited Away",2001,8.5,"Hayao Miyazaki" +Andhadhun,2018,8.1,"Sriram Raghavan" +Dangal,2016,8.3,"Sriram Raghavan" +"Avengers: Infinity War",2019,8.4,"Anthony Russo" +Alien,1979,8.4,"Ridley Scott" +... +``` + +We can invoke `slx` like so... + +``` +$ slx -f /tmp/movies.csv +``` + +...and then query using the REPL: + +``` +> director:/S.*m/ OR director:"Hayao" +Andhadhun 2018 8.1 1 Sriram Raghavan 0 1 +Dangal 2016 8.3 1 Sriram Raghavan 0 1 +Howls Moving Castle 2004 8.2 0 Hayao Miyazaki 1 1 +Judgment at Nuremberg 1961 8.1 0 Stanley Kramer 0 0 +Laputa: Castle in the Sky 1986 8.0 0 Hayao Miyazaki 1 1 +Nausicaa of the Valley of the Wind 1984 8.0 0 Hayao Miyazaki 1 1 +Network 1976 8.1 0 Sidney Lumet 0 0 +``` + +## Warning + +Simple Select is **not intended for production use**. I wrote this as a toy +project for my own consumption. There are quite a few bugs of which I'm aware +and quite a few other features that I'd like to support but haven't had time to +support just yet. + +Why publish it then? Maybe this project will inspire drive-by contributions or +other, better-implemented spin-offs. + +## Wish List + +Speaking of drive-by contributions, here are some things that I'd like to +support: + +- Implicit `AND` conjunctions (`director:/Tarantino/ year:"2000"` instead of + `director:/Tarantino/ AND year:"2000"`) +- Support for types like numbers, dates (`year:2000` instead of `year:"2000"`) +- `slx` should support CSV *and* (at the very least) sqlite3 file formats (open + to other formats as well) +- Regexes should be the default query primitive (`director:Tarantino` instead of + `director:/Tarantino/`) +- Improve parsing errors (including surfacing errors to the user) +- Support for reading from `STDIN` and issuing queries from the command-line +- Unit-testing +- Configurable delimiters for output data (right now it's just `\t`) +- (Maybe) rewrite in a faster, more-type-safe languages (e.g. Rust) + +I'm likely missing other FRs, bugs, so please file issues! diff --git a/users/wpcarro/scratch/simple-select/main.py b/users/wpcarro/scratch/simple-select/main.py new file mode 100644 index 000000000000..3ae6c5d60e08 --- /dev/null +++ b/users/wpcarro/scratch/simple-select/main.py @@ -0,0 +1,262 @@ +from argparse import ArgumentParser + +import csv +from parser import Parser +import sqlite3 +import string +from scanner import Scanner +import re +import readline + +################################################################################ +# Predicates +################################################################################ + +def is_alpha(c): + return c in string.ascii_letters + +def is_digit(c): + return c in "0123456789" + +def is_alphanumeric(c): + return is_alpha(c) or is_digit(c) + +def is_whitespace(c): + return c in " \r\t\n" + +################################################################################ +# Tokenizer +################################################################################ + +AND = ("CONJUNCTION", "AND") +OR = ("CONJUNCTION", "OR") +NOT = ("PUNCTUATION", "NOT") +COLON = ("PUNCTUATION", "COLON") +LPAREN = ("PUNCTUATION", "LPAREN") +RPAREN = ("PUNCTUATION", "RPAREN") + +def tokenize(x): + s = Scanner(x) + tokens = scan_tokens(s) + return tokens + +def scan_tokens(s): + result = [] + while not s.exhausted(): + if is_whitespace(s.peek()): + s.advance() + else: + result.append(scan_token(s)) + return result + +def scan_token(s): + punctuation = { + "-": NOT, + ":": COLON, + "(": LPAREN, + ")": RPAREN, + } + c = s.peek() + if c in punctuation: + s.advance() + return punctuation[c] + if c == "\"": + return tokenize_string(s) + if c == "/": + return tokenize_regex(s) + if is_alpha(c): + return tokenize_identifier(s) + +def tokenize_string(s): + s.advance() # ignore opening 2x-quote + current = "" + while s.peek() != "\"" and not s.exhausted(): + current += s.advance() + if s.exhausted(): + raise Exception("Unterminated string") + s.advance() # ignore closing 2x-quote + return ("STRING", current) + +def tokenize_regex(s): + s.advance() # ignore opening forward-slash + current = "" + while s.peek() != "/" and not s.exhausted(): + current += s.advance() + if s.exhausted(): + raise Exception("Unterminated regex") + s.advance() # ignore closing forward-slash + return ("REGEX", current) + +def tokenize_identifier(s): + conjunctions = { + "AND", + "OR", + } + current = s.advance() + while is_alphanumeric(s.peek()): + current += s.advance() + if current.upper() in conjunctions: + return ("CONJUNCTION", current.upper()) + else: + return ("IDENTIFIER", current) + +################################################################################ +# Parser +################################################################################ + +# EBNF +# Note: we order expression types by ascending levels of precedence. +# +# expression -> conjunction ; +# conjunction -> selection ( ( "AND" | "OR" )? selection )* ; +# selection -> "-"? IDENTIFIER ":" ( REGEX | STRING ) | grouping ; +# grouping -> REGEX | STRING | "(" expression ")" ; + +def parse(x): + tokens = tokenize(x) + p = Parser(tokens) + return expression(p) + +def expression(p): + return conjunction(p) + +def conjunction(p): + lhs = selection(p) + + # TODO(wpcarro): Support default AND conjuctions when they're undefined. + while not p.exhausted() and p.match({AND, OR}): + conj = p.peek(n=-1) + rhs = selection(p) + lhs = ("CONJUNCTION", conj[1], lhs, rhs) + + return lhs + +def selection(p): + negate = False + if p.peek() == NOT: + negate = True + p.advance() + + if p.peek()[0] != "IDENTIFIER": + return grouping(p) + + ident = p.expect(lambda x: x[0] == "IDENTIFIER") + colon = p.expect(lambda x: x[1] == "COLON") + value = p.expect(lambda x: x[0] in {"REGEX", "STRING"}) + return ("SELECTION", negate, ident[1], value) + +def grouping(p): + if p.peek()[0] == "REGEX": + return p.advance() + + if p.peek()[0] == "STRING": + return p.advance() + + if p.peek() == LPAREN: + p.advance() + expr = expression(p) + p.expect(lambda x: x == RPAREN) + return ("GROUPING", expr) + +################################################################################ +# Compiler +################################################################################ + +def compile(source, table, columns): + ast = parse(source) + return "SELECT * FROM {} WHERE {};".format(table, do_compile(ast, columns)) + +def do_compile(ast, columns): + if ast[0] == "REGEX": + cols = "({})".format(" || ".join(columns)) + return "{} REGEXP '.*{}.*'".format(cols, ast[1]) + + if ast[0] == "STRING": + cols = "({})".format(" || ".join(columns)) + return "{} LIKE '%{}%'".format(cols, ast[1]) + + if ast[0] == "SELECTION": + return compile_selection(ast) + + if ast[0] == "CONJUNCTION": + _, conj, lhs, rhs = ast + lhs = do_compile(lhs, columns) + rhs = do_compile(rhs, columns) + return "{} {} {}".format(lhs, conj, rhs) + + if ast[0] == "GROUPING": + return "({})".format(do_compile(ast[1], columns)) + + raise Exception("Unexpected AST: \"{}\"".format(ast)) + +def compile_selection(ast): + _, negate, column, query = ast + match = compile_query(negate, query) + return "{} {}".format(column, match) + +def compile_query(negate, query): + query_type, query_string = query + if query_type == "REGEX": + if negate: + return "NOT REGEXP '.*{}.*'".format(query_string) + return "REGEXP '.*{}.*'".format(query_string) + + if query_type == "STRING": + if negate: + return "NOT LIKE '%{}%'".format(query_string) + return "LIKE '%{}%'".format(query_string) + +################################################################################ +# Helper Functions +################################################################################ + +def regexp(expr, x): + reg = re.compile(expr) + return reg.search(x) is not None + +################################################################################ +# Main +################################################################################ + +def main(csv_path=None, debug=False): + # Import CSV to SQLite + table = "main" + con = sqlite3.connect(":memory:") + + con.create_function("REGEXP", 2, regexp) + + cur = con.cursor() + with open(csv_path, "r") as f: + r = csv.DictReader(f) + columns = next(r).keys() + + # TODO(wpcarro): Use safer interpolation variant of "?" here and throughout. + cur.execute("CREATE TABLE {} ({});".format(table, ",".join(columns))) + rows = [tuple(row[col] for col in columns) for row in r] + cur.executemany("INSERT INTO {} ({}) VALUES ({});".format(table, ",".join(columns), ",".join("?" for _ in columns)), rows) + con.commit() + + while True: + x = input("> ") + + if debug: + print("tokens:\t{}".format(tokenize(x))) + print("AST:\t{}".format(parse(x))) + print("query:\t\"{}\"".format(compile(x, table, columns))) + + try: + compile(x, table, columns) + for row in cur.execute(compile(x, table, columns)): + print("\t".join(str(cell) for cell in row)) + except: + print("Compilation error.") + + # TODO(wpcarro): Trap exits and ensure cleanup always runs. + con.close() + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("-f", "--file", dest="file", help="Path to the CSV from which to read", metavar="PATH") + parser.add_argument("-d", "--debug", dest="debug", default=False, action="store_true", help="Enable debugging") + args = parser.parse_args() + main(csv_path=args.file, debug=args.debug) diff --git a/users/wpcarro/scratch/simple-select/parser.py b/users/wpcarro/scratch/simple-select/parser.py new file mode 100644 index 000000000000..d26f970e57a2 --- /dev/null +++ b/users/wpcarro/scratch/simple-select/parser.py @@ -0,0 +1,31 @@ +class Parser(object): + def __init__(self, tokens): + self.tokens = tokens + self.i = 0 + + def exhausted(self): + return self.i >= len(self.tokens) + + def peek(self, n=0): + return self.tokens[self.i + n] + + def advance(self): + if not self.exhausted(): + self.i += 1 + return self.peek(n=-1) + + def match(self, xs): + if self.peek() in xs: + self.advance() + return True + return False + + def test(self, predicate): + return predicate(self.tokens, self.i) + + def expect(self, predicate): + if self.exhausted(): + raise Exception("Unexpected EOL") + if predicate(self.peek()): + return self.advance() + raise Exception("Unexpected token: \"{}\"".format(self.peek())) diff --git a/users/wpcarro/scratch/simple-select/scanner.py b/users/wpcarro/scratch/simple-select/scanner.py new file mode 100644 index 000000000000..5dae68aee551 --- /dev/null +++ b/users/wpcarro/scratch/simple-select/scanner.py @@ -0,0 +1,27 @@ +# According to Crafting Interpreters, the only two primitives that a +# scanner/lexer needs are peek and advance; other functions (e.g. match) are +# nice-to-haves. +class Scanner(object): + def __init__(self, chars): + self.i = 0 + self.chars = chars + + def exhausted(self): + return self.i >= len(self.chars) + + def peek(self, n=0): + return self.chars[self.i + n] if self.i in range(0, len(self.chars)) else '\0' + + def advance(self): + result = self.peek() + self.i += 1 + return result + + def match(self, x): + if self.exhausted(): + return False + if self.peek() == x: + self.advance() + return True + else: + return False |