diff options
Diffstat (limited to 'users/wpcarro/scratch/compiler')
-rw-r--r-- | users/wpcarro/scratch/compiler/.envrc | 3 | ||||
-rw-r--r-- | users/wpcarro/scratch/compiler/.gitignore | 5 | ||||
-rw-r--r-- | users/wpcarro/scratch/compiler/debug.ml | 66 | ||||
-rw-r--r-- | users/wpcarro/scratch/compiler/expr_parser.ml | 187 | ||||
-rw-r--r-- | users/wpcarro/scratch/compiler/inference.ml | 183 | ||||
-rw-r--r-- | users/wpcarro/scratch/compiler/parser.ml | 47 | ||||
-rw-r--r-- | users/wpcarro/scratch/compiler/prettify.ml | 9 | ||||
-rw-r--r-- | users/wpcarro/scratch/compiler/register_vm.ml | 129 | ||||
-rw-r--r-- | users/wpcarro/scratch/compiler/register_vm.py | 161 | ||||
-rw-r--r-- | users/wpcarro/scratch/compiler/shell.nix | 9 | ||||
-rw-r--r-- | users/wpcarro/scratch/compiler/tests.ml | 43 | ||||
-rw-r--r-- | users/wpcarro/scratch/compiler/type_parser.ml | 104 | ||||
-rw-r--r-- | users/wpcarro/scratch/compiler/types.ml | 31 | ||||
-rw-r--r-- | users/wpcarro/scratch/compiler/vec.ml | 127 |
14 files changed, 1104 insertions, 0 deletions
diff --git a/users/wpcarro/scratch/compiler/.envrc b/users/wpcarro/scratch/compiler/.envrc new file mode 100644 index 000000000000..ff7eea1f7a05 --- /dev/null +++ b/users/wpcarro/scratch/compiler/.envrc @@ -0,0 +1,3 @@ +source_up + +use_nix diff --git a/users/wpcarro/scratch/compiler/.gitignore b/users/wpcarro/scratch/compiler/.gitignore new file mode 100644 index 000000000000..96261d3fc7e9 --- /dev/null +++ b/users/wpcarro/scratch/compiler/.gitignore @@ -0,0 +1,5 @@ +a.out +*.cmi +*.cmo +*.cmx +*.o \ No newline at end of file diff --git a/users/wpcarro/scratch/compiler/debug.ml b/users/wpcarro/scratch/compiler/debug.ml new file mode 100644 index 000000000000..e39ff13742be --- /dev/null +++ b/users/wpcarro/scratch/compiler/debug.ml @@ -0,0 +1,66 @@ +open Types + +(* Print x prefixed with tag and return x unchanged. *) +let print (f : 'a -> string) (tag : string) (x : 'a) : 'a = + Printf.printf "%s: %s\n" tag (f x); + x + +let rec ast (tree : Types.value) : string = + match tree with + | ValueLiteral (LiteralBool x) -> + Printf.sprintf "ValueLiteral (LiteralBool %s)" (string_of_bool x) + | ValueLiteral (LiteralInt x) -> + Printf.sprintf "ValueLiteral (LiteralInt %s)" (string_of_int x) + | ValueVariable x -> + Printf.sprintf "ValueVariable %s" x + | ValueFunction (x, body) -> + Printf.sprintf "ValueFunction (%s, %s)" x (ast body) + | ValueApplication (f, x) -> + Printf.sprintf "ValueApplication (%s, %s)" (ast f) (ast x) + | ValueVarApplication (f, x) -> + Printf.sprintf "ValueVarApplication (%s, %s)" f (ast x) + | ValueBinder (k, v, x) -> + Printf.sprintf "ValueBinder (%s, %s, %s)" k (ast v) (ast x) + +let rec value (x : value) : string = + match x with + | ValueLiteral (LiteralInt x) -> + Printf.sprintf "Int %d" x + | ValueLiteral (LiteralBool x) -> + Printf.sprintf "Bool %b" x + | ValueVariable x -> + Printf.sprintf "Var %s" x + | ValueFunction (name, x) -> + Printf.sprintf "Fn %s %s" name (value x) + | ValueApplication (f, x) -> + Printf.sprintf "App %s %s" (value f) (value x) + | ValueVarApplication (name, x) -> + Printf.sprintf "App %s %s" name (value x) + | ValueBinder (name, x, body) -> + Printf.sprintf "Bind %s %s %s" name (value x) (value body) + +let rec type' (t : _type) : string = + match t with + | TypeInt -> "Integer" + | TypeBool -> "Boolean" + | TypeVariable k -> Printf.sprintf "%s" k + | TypeArrow (a, b) -> Printf.sprintf "%s -> %s" (type' a) (type' b) + +let quantified_type (q : quantified_type) : string = + let QuantifiedType (vars, t) = q in + if List.length vars == 0 then + Printf.sprintf "%s" (type' t) + else + Printf.sprintf "forall %s. %s" (String.concat "," vars) (type' t) + +let substitution (s : substitution) : string = + FromString.fold (fun k v acc -> Printf.sprintf "%s\"%s\" |-> %s;" acc k (type' v)) s "" + |> Printf.sprintf "{ %s }" + +let env (s : env) : string = + FromString.fold (fun k v acc -> Printf.sprintf "%s\"%s\" |-> %s;" acc k (quantified_type v)) s "" + |> Printf.sprintf "{ %s }" + +let inference (Inference (s, t)) = + Printf.sprintf "type: %s; sub: %s" (type' t) (substitution s) + diff --git a/users/wpcarro/scratch/compiler/expr_parser.ml b/users/wpcarro/scratch/compiler/expr_parser.ml new file mode 100644 index 000000000000..797592931a2c --- /dev/null +++ b/users/wpcarro/scratch/compiler/expr_parser.ml @@ -0,0 +1,187 @@ +(******************************************************************************* + * CLI REPL for an s-expression Lambda Calculus. + * + * Lambda Calculus Expression Language: + * + * Helpers: + * symbol -> [-a-z]+ + * string -> '"' [^"]* '"' + * boolean -> 'true' | 'false' + * integer -> [1-9][0-9]* + * + * Core: + * expression -> funcdef + * binding -> '(' 'let' symbol expr expr ')' + * funcdef -> '(' 'fn' symbol expr ')' + * funccall -> '(' ( symbol | funcdef) expr ')' + * literal -> string | boolean | integer + * variable -> symbol + * + * Example Usage: + * $ ocamlopt types.ml str.cmxa inference.ml parser.ml expr_parser.ml && ./a.out + * repl> true + * tokens: [ "true" ] + * ast: ValueLiteral (LiteralBool true) + * Boolean + * repl> + * + ******************************************************************************) + +open Parser +open Inference +open Debug +open Prettify +open Vec + +type literal = LiteralBool of bool | LiteralInt of int + +let ( let* ) = Option.bind +let map = Option.map + +let tokenize (x : string) : token vec = + let xs = Vec.create () in + let i = ref 0 in + while !i < String.length x do + match x.[!i] with + | ' ' -> i := !i + 1 + (* strings *) + | '"' -> + let curr = ref "\"" in + i := !i + 1; + while x.[!i] != '"' do + curr := !curr ^ "?"; + i := !i + 1 + done; + curr := !curr ^ "\""; + Vec.append !curr xs; + i := !i + 1 + | '(' -> + Vec.append "(" xs; + i := !i + 1 + | ')' -> + Vec.append ")" xs; + i := !i + 1 + | _ -> + let token = ref "" in + while !i < String.length x && not (String.contains "() " x.[!i]) do + token := !token ^ String.make 1 x.[!i]; + i := !i + 1 + done; + Vec.append !token xs + done; + xs + +let parse_symbol (p : parser) : string option = + let* x = p#curr in + if Str.string_match (Str.regexp "[-a-z][0-9]*") x 0 then + begin + p#advance; + Some x + end + else + None + +let parse_variable (p : parser) : Types.value option = + let* x = parse_symbol p in + Some (Types.ValueVariable x) + +let parse_literal (p : parser) : Types.value option = + match p#curr with + | Some "true" -> + p#advance; + Some (ValueLiteral (LiteralBool true)) + | Some "false" -> + p#advance; + Some (ValueLiteral (LiteralBool false)) + | Some x -> + (match int_of_string_opt x with + | Some n -> + p#advance; + Some (ValueLiteral (LiteralInt n)) + | _ -> + if String.starts_with ~prefix:"\"" x then + begin + p#advance; + Some (ValueLiteral (LiteralString x)) + end + else + parse_variable p) + | _ -> None + +let rec parse_expression (p : parser) : Types.value option = + parse_binding p + +and parse_funccall (p : parser) : Types.value option = + match (p#curr, p#next) with + | (Some "(", Some "(") -> + p#advance; + let* f = parse_funcdef p in + let* x = parse_expression p in + p#expect ")"; + Some (Types.ValueApplication (f, x)) + | (Some "(", _) -> + p#advance; + let* f = parse_symbol p in + let* x = parse_expression p in + p#expect ")"; + Some (Types.ValueVarApplication (f, x)) + | _ -> parse_literal p + +and parse_funcdef (p : parser) : Types.value option = + match (p#curr, p#next) with + | (Some "(", Some "fn") -> + p#advance; + p#advance; + let* name = parse_symbol p in + let* body = parse_expression p in + p#expect ")"; + Some (Types.ValueFunction (name, body)) + | _ -> parse_funccall p + +and parse_binding (p : parser) : Types.value option = + match (p#curr, p#next) with + | (Some "(", Some "let") -> + p#advance; + p#advance; + let* name = parse_symbol p in + let* value = parse_expression p in + let* body = parse_expression p in + Some (Types.ValueBinder (name, value, body)) + | _ -> parse_funcdef p + +let print_tokens (xs : string vec) : unit = + xs + |> Vec.map (Printf.sprintf "\"%s\"") + |> Vec.join ", " + |> Printf.sprintf "tokens: [ %s ]" + |> print_string + |> print_newline + +let parse_language (x : string) : Types.value option = + let tokens = tokenize x in + print_tokens tokens; + parse_expression (new parser tokens) + +let main = + while true do + begin + print_string "repl> "; + let x = read_line () in + match parse_language x with + | Some ast -> + (match ast |> Debug.print Debug.ast "ast" |> do_infer with + | None -> + "Type-check failed" + |> print_string + |> print_newline + | Some x -> + x + |> Prettify.type' + |> print_string + |> print_newline) + | None -> + "Could not parse" + |> print_string + |> print_newline + end + done diff --git a/users/wpcarro/scratch/compiler/inference.ml b/users/wpcarro/scratch/compiler/inference.ml new file mode 100644 index 000000000000..e00904a09eed --- /dev/null +++ b/users/wpcarro/scratch/compiler/inference.ml @@ -0,0 +1,183 @@ +(******************************************************************************* + * WIP implementation of the Hindley-Milner type system primarily for learning + * purposes. + * + * Wish List: + * - TODO Debug this inference (let f (fn x x) f) + ******************************************************************************) + +open Types +open Debug + +(******************************************************************************* + * Library + ******************************************************************************) + +let ( let* ) = Option.bind + +let set_from_list (xs : string list) : set = + xs |> List.fold_left (fun acc x -> FromString.add x true acc) FromString.empty + +(* Map union that favors the rhs values (i.e. "last writer wins"). *) +let lww (xs : 'a FromString.t) (ys : 'a FromString.t) : 'a FromString.t = + FromString.union (fun k x y -> Some y) xs ys + +let emptyEnv : env = FromString.empty + +let rec free_type_vars (t : _type) : set = + match t with + | TypeVariable k -> FromString.singleton k true + | TypeInt -> FromString.empty + | TypeBool -> FromString.empty + | TypeString -> FromString.empty + | TypeArrow (a, b) -> lww (free_type_vars a) (free_type_vars b) + +let i : int ref = ref 0 + +let make_type_var () : _type = + let res = Printf.sprintf "a%d" !i in + i := !i + 1; + TypeVariable res + +exception OccursCheck + +let bind_var (k : string) (t : _type) : substitution = + if t == TypeVariable k then FromString.empty + else if FromString.exists (fun name _ -> name == k) (free_type_vars t) then + raise OccursCheck + else FromString.singleton k t + +let rec instantiate (q : quantified_type) : _type = + let (QuantifiedType (names, t)) = q in + match t with + | TypeInt -> TypeInt + | TypeBool -> TypeBool + | TypeString -> TypeString + | TypeVariable k -> + if List.exists (( == ) k) names then make_type_var () else TypeVariable k + | TypeArrow (a, b) -> + TypeArrow + (instantiate (QuantifiedType (names, a)), instantiate (QuantifiedType (names, b))) + +let quantified_type_ftvs (q : quantified_type) : set = + let (QuantifiedType (names, t)) = q in + lww (free_type_vars t) (names |> set_from_list) + +let generalize (env : env) (t : _type) : quantified_type = + let envftv = + env |> FromString.bindings + |> List.map (fun (_, v) -> quantified_type_ftvs v) + |> List.fold_left lww FromString.empty + in + let names = + lww (free_type_vars t) envftv + |> FromString.bindings + |> List.map (fun (k, _) -> k) + in + QuantifiedType (names, t) + +let rec substitute_type (s : substitution) (t : _type) : _type = + match t with + | TypeVariable k as tvar -> + (match FromString.find_opt k s with + | Some v -> substitute_type s v + | None -> tvar) + | TypeArrow (a, b) -> TypeArrow (substitute_type s a, substitute_type s b) + | TypeInt -> TypeInt + | TypeBool -> TypeBool + | TypeString -> TypeString + +let substitute_quantified_type (s : substitution) (q : quantified_type) : quantified_type = + let (QuantifiedType (names, t)) = q in + let s1 = + FromString.filter (fun k v -> List.exists (fun x -> k != x) names) s + in + QuantifiedType (names, substitute_type s1 t) + +let substitute_env (s : substitution) (env : env) : env = + FromString.map (fun q -> substitute_quantified_type s q) env + +let compose_substitutions (xs : substitution list) : substitution = + let do_compose_substitutions s1 s2 = lww s2 (FromString.map (substitute_type s2) s1) in + List.fold_left do_compose_substitutions FromString.empty xs + +let rec unify (a : _type) (b : _type) : substitution option = + match (a, b) with + | TypeInt, TypeInt -> Some FromString.empty + | TypeBool, TypeBool -> Some FromString.empty + | TypeString, TypeString -> Some FromString.empty + | TypeVariable k, _ -> Some (bind_var k b) + | _, TypeVariable k -> Some (bind_var k a) + | TypeArrow (a, b), TypeArrow (c, d) -> + let* s1 = unify a c in + let* s2 = unify (substitute_type s1 b) (substitute_type s1 d) in + let s3 = compose_substitutions [s1; s2] in + s1 |> Debug.substitution |> Printf.sprintf "s1: %s\n" |> print_string; + s2 |> Debug.substitution |> Printf.sprintf "s2: %s\n" |> print_string; + s3 |> Debug.substitution |> Printf.sprintf "s3: %s\n" |> print_string; + Some s3 + | _ -> None + +let print_env (env : env) = + Printf.sprintf "env: %s\n" (Debug.env env) + |> print_string + +let print_val (x : value) = + Printf.sprintf "val: %s\n" (Debug.value x) + |> print_string + +let print_inference (x : inference option) = + match x with + | None -> "no inference\n" |> print_string + | Some x -> + Printf.sprintf "inf: %s\n" (Debug.inference x) + |> print_string + +let rec infer (env : env) (x : value) : inference option = + print_env env; + print_val x; + let res = match x with + | ValueLiteral lit -> ( + match lit with + | LiteralInt _ -> Some (Inference (FromString.empty, TypeInt)) + | LiteralBool _ -> Some (Inference (FromString.empty, TypeBool)) + | LiteralString _ -> Some (Inference (FromString.empty, TypeString))) + | ValueVariable k -> + let* v = FromString.find_opt k env in + Some (Inference (FromString.empty, instantiate v)) + | ValueFunction (param, body) -> + let typevar = make_type_var () in + let env1 = FromString.remove param env in + let env2 = lww (FromString.singleton param (QuantifiedType ([], typevar))) env1 in + let* (Inference (s1, t1)) = infer env2 body in + Some (Inference (s1, TypeArrow (substitute_type s1 typevar, t1))) + | ValueApplication (f, x) -> + let result = make_type_var () in + let* (Inference (s1, t1)) = infer env f in + let* (Inference (s2, t2)) = infer (substitute_env s1 env) x in + let* s3 = unify (substitute_type s2 t1) (TypeArrow (t2, result)) in + Some (Inference + ( compose_substitutions [s3; s2; s1], + substitute_type s3 result )) + | ValueVarApplication (name, x) -> + let* v = FromString.find_opt name env in + let t1 = instantiate v in + let typevar = make_type_var () in + let* (Inference (s2, t2)) = infer env x in + let* s3 = unify (substitute_type s2 t1) (TypeArrow (t2, typevar)) in + Some (Inference + ( compose_substitutions [s2; s3], + substitute_type s3 typevar )) + | ValueBinder (k, v, body) -> + let* (Inference (s1, t1)) = infer env v in + let env1 = FromString.remove k env in + let tg = generalize (substitute_env s1 env) t1 in + let env2 = FromString.add k tg env1 in + let* (Inference (s2, t2)) = infer (substitute_env s1 env2) body in + Some (Inference (compose_substitutions [s1; s2], t2)) in + print_inference res; + res + +let do_infer (x : value) : _type option = + let* Inference (_, t) = infer FromString.empty x in + Some t diff --git a/users/wpcarro/scratch/compiler/parser.ml b/users/wpcarro/scratch/compiler/parser.ml new file mode 100644 index 000000000000..dc66f2506ed3 --- /dev/null +++ b/users/wpcarro/scratch/compiler/parser.ml @@ -0,0 +1,47 @@ +(****************************************************************************** + * Defines a generic parser class. + ******************************************************************************) + +open Vec + +exception ParseError of string + +type token = string +type state = { i : int; tokens : token vec } + +class parser (tokens : token vec) = + object (self) + val mutable tokens = tokens + val mutable i = ref 0 + + method advance = i := !i + 1 + method prev : token option = Vec.get (!i - 1) tokens + method curr : token option = Vec.get !i tokens + method next : token option = Vec.get (!i + 1) tokens + + method consume : token option = + match self#curr with + | None -> None + | Some x as res -> + self#advance; + res + + method expect (x : token) = + match self#curr with + | Some y when x = y -> self#advance + | _ -> raise (ParseError (Printf.sprintf "Expected %s" x)) + + method matches (x : token) : bool = + match self#curr with + | None -> false + | Some y -> + if x = y then + begin + self#advance; + true + end + else false + + method exhausted : bool = !i >= Vec.length tokens + method state : state = { i = !i; tokens } + end diff --git a/users/wpcarro/scratch/compiler/prettify.ml b/users/wpcarro/scratch/compiler/prettify.ml new file mode 100644 index 000000000000..7903ad36947c --- /dev/null +++ b/users/wpcarro/scratch/compiler/prettify.ml @@ -0,0 +1,9 @@ +open Types + +(* Pretty-print the type, t. *) +let rec type' (t : _type) : string = + match t with + | TypeInt -> "Integer" + | TypeBool -> "Boolean" + | TypeVariable k -> Printf.sprintf "%s" k + | TypeArrow (a, b) -> Printf.sprintf "%s -> %s" (type' a) (type' b) diff --git a/users/wpcarro/scratch/compiler/register_vm.ml b/users/wpcarro/scratch/compiler/register_vm.ml new file mode 100644 index 000000000000..0a573048e77e --- /dev/null +++ b/users/wpcarro/scratch/compiler/register_vm.ml @@ -0,0 +1,129 @@ +(* + Rewriting the Python implementation of the register VM in OCaml to see how + how much imperative/mutative programming OCaml allows. + + Note: Some of this code is intentionally not written in a functional style + because one of the goals was to see how similar this OCaml implementation + could be to the Python implementation. + + Conclusion: It's pretty easy to switch between the two languages. + + Usage: Recommended compilation settings I hastily found online: + $ ocamlopt -w +A-42-48 -warn-error +A-3-44 ./register_vm.ml && ./a.out + + Formatting: + $ ocamlformat --inplace --enable-outside-detected-project ./register_vm.ml + *) + +open Vec + +type reg = X | Y | Res +type binop = int -> int -> int + +type ast = + | Const of int + | Add of ast * ast + | Sub of ast * ast + | Mul of ast * ast + | Div of ast * ast + +type opcode0 = + | Op0AssignRegLit of reg * int + | Op0AssignRegReg of reg * reg + | Op0BinOp of binop * reg * reg * reg + | Op0PushReg of reg + | Op0PopAndSet of reg + | Op0Null + +type opcode1 = + | Op1AssignRegLit of int * int + | Op1AssignRegReg of int * int + | Op1BinOp of (int -> int -> int) * int * int * int + | Op1PushReg of int + | Op1PopAndSet of int + | Op1Null + +type opcodes0 = opcode0 vec +type opcodes1 = opcode1 vec + +let registers : int vec = Vec.make 8 0 +let stack : int Stack.t = Stack.create () +let reg_idx (r : reg) : int = match r with X -> 0 | Y -> 1 | Res -> 2 + +let reg_name (r : reg) : string = + match r with X -> "x" | Y -> "y" | Res -> "res" + +let print_opcodes0 (xs : opcodes0) : opcodes0 = + let print_opcode x = + match x with + | Op0AssignRegLit (r, x) -> Printf.printf "%s <- %d\n" (reg_name r) x + | Op0AssignRegReg (dst, src) -> + Printf.printf "%s <- $%s\n" (reg_name dst) (reg_name src) + | Op0PushReg src -> Printf.printf "push $%s\n" (reg_name src) + | Op0PopAndSet dst -> Printf.printf "%s <- pop\n" (reg_name dst) + | Op0BinOp (_, lhs, rhs, dst) -> + Printf.printf "%s <- $%s ? $%s\n" (reg_name dst) (reg_name lhs) + (reg_name rhs) + | Op0Null -> () + in + Vec.iter print_opcode xs; + xs + +let rec compile (ast : ast) : opcodes0 = + let result : opcodes0 = Vec.create () in + (match ast with + | Const x -> Vec.append (Op0AssignRegLit (Res, x)) result; + | Add (lhs, rhs) -> compile_bin_op ( + ) lhs rhs result + | Sub (lhs, rhs) -> compile_bin_op ( - ) lhs rhs result + | Mul (lhs, rhs) -> compile_bin_op ( * ) lhs rhs result + | Div (lhs, rhs) -> compile_bin_op ( / ) lhs rhs result); + result + +and compile_bin_op (f : binop) (lhs : ast) (rhs : ast) (result : opcodes0) = + lhs |> compile |> Vec.append_to result; + Vec.append (Op0PushReg Res) result; + rhs |> compile |> Vec.append_to result; + Vec.append (Op0PopAndSet X) result; + Vec.append (Op0AssignRegReg (Y, Res)) result; + Vec.append (Op0BinOp (f, X, Y, Res)) result + +let compile_registers (xs : opcodes0) : opcodes1 = + let do_compile x = + match x with + | Op0AssignRegLit (dst, x) -> Op1AssignRegLit (reg_idx dst, x) + | Op0AssignRegReg (dst, src) -> Op1AssignRegReg (reg_idx dst, reg_idx src) + | Op0PushReg src -> Op1PushReg (reg_idx src) + | Op0PopAndSet dst -> Op1PopAndSet (reg_idx dst) + | Op0BinOp (f, lhs, rhs, dst) -> Op1BinOp (f, reg_idx lhs, reg_idx rhs, reg_idx dst) + | Op0Null -> Op1Null + in + Vec.map do_compile xs + +let eval (xs : opcodes1) : int = + let ip = ref 0 in + while !ip < Vec.length xs do + match Vec.get_unsafe !ip xs with + | Op1AssignRegLit (dst, x) -> + Vec.set dst x registers; + ip := !ip + 1 + | Op1AssignRegReg (dst, src) -> + Vec.set dst (Vec.get_unsafe src registers) registers; + ip := !ip + 1 + | Op1PushReg src -> + Stack.push (Vec.get_unsafe src registers) stack; + ip := !ip + 1 + | Op1PopAndSet dst -> + Vec.set dst (Stack.pop stack) registers; + ip := !ip + 1 + | Op1BinOp (f, lhs, rhs, dst) -> + let lhs = Vec.get_unsafe lhs registers in + let rhs = Vec.get_unsafe rhs registers in + Vec.set dst (f lhs rhs) registers; + ip := !ip + 1 + | Op1Null -> ip := !ip + 1 + done; + Vec.get_unsafe (reg_idx Res) registers +;; + +Add (Mul (Const 2, Div (Const 100, Const 2)), Const 5) +|> compile |> print_opcodes0 |> compile_registers |> eval |> print_int diff --git a/users/wpcarro/scratch/compiler/register_vm.py b/users/wpcarro/scratch/compiler/register_vm.py new file mode 100644 index 000000000000..302bce5a0ecb --- /dev/null +++ b/users/wpcarro/scratch/compiler/register_vm.py @@ -0,0 +1,161 @@ +# Silly proof-of-concept register VM. + +def compile_binary_op(op, ast): + result = [] + for x in compile(ast[1]): + result.append(x) + result.append(PUSH_REG) + result.append(RES) + for x in compile(ast[2]): + result.append(x) + result.append(ASSIGN_REG_REG) + result.append(Y) + result.append(RES) + result.append(POP) + result.append(X) + result.append(op) + return result + +def compile(ast): + result = [] + + if ast[0] == 'CONST': + result.append(ASSIGN_REG_LIT) + result.append(RES) + result.append(ast[1]) + elif ast[0] == 'ADD': + result += compile_binary_op(ADD, ast) + elif ast[0] == 'SUB': + result += compile_binary_op(SUB, ast) + elif ast[0] == 'MUL': + result += compile_binary_op(MUL, ast) + elif ast[0] == 'DIV': + result += compile_binary_op(DIV, ast) + elif ast[0] == 'RETURN': + result.append(RETURN) + else: + raise Exception('Cannot compile unknown AST node: {}'.format(ast[0])) + + return result + +# opcodes +ASSIGN_REG_LIT = 0x0 +ASSIGN_REG_REG = 0x1 +ADD = 0x2 +SUB = 0x3 +MUL = 0x4 +DIV = 0x5 +SWAP = 0x6 +RETURN = 0x7 +PUSH_REG = 0x8 +POP = 0x9 + +# register indices +X = 0x0 +Y = 0x1 +RES = 0x2 + +registers = [0x0] * 8 +stack = [] + +def reg_name(i): + if i == X: return 'x' + if i == Y: return 'x' + if i == RES: return 'res' + +def print_instructions(xs): + i = 0 + + while i < len(xs): + if xs[i] == ASSIGN_REG_LIT: + # print('ASSIGN_REG_LIT {} {}'.format(reg_name(xs[i + 1]), xs[i + 2])) + print('{} <- {}'.format(reg_name(xs[i + 1]), xs[i + 2])) + i += 3 + elif xs[i] == ASSIGN_REG_REG: + # print('ASSIGN_REG_REG {} {}'.format(reg_name(xs[i + 1]), reg_name(xs[i + 2]))) + print('{} <- ${}'.format(reg_name(xs[i + 1]), reg_name(xs[i + 2]))) + i += 3 + elif xs[i] == ADD: + print('add') + i += 1 + elif xs[i] == SUB: + print('sub') + i += 1 + elif xs[i] == MUL: + print('mul') + i += 1 + elif xs[i] == DIV: + print('div') + i += 1 + elif xs[i] == PUSH_REG: + print('push ${}'.format(reg_name(xs[i + 1]))) + i += 2 + elif xs[i] == POP: + print('{} <- pop'.format(reg_name(xs[i + 1]))) + i += 2 + else: + raise Exception('Cannot print instruction: {}'.format(xs[i])) + +def eval(instructions): + print_instructions(instructions) + ip = 0 + cont = True + while ip < len(instructions): + if instructions[ip] == ASSIGN_REG_LIT: + r = instructions[ip + 1] + x = instructions[ip + 2] + registers[r] = x + ip += 3 + elif instructions[ip] == ASSIGN_REG_REG: + r_dst = instructions[ip + 1] + r_src = instructions[ip + 2] + registers[r_dst] = registers[r_src] + ip += 3 + elif instructions[ip] == ADD: + registers[RES] = registers[X] + registers[Y] + ip += 1 + elif instructions[ip] == MUL: + registers[RES] = registers[X] * registers[Y] + ip += 1 + elif instructions[ip] == SUB: + registers[RES] = registers[X] - registers[Y] + ip += 1 + elif instructions[ip] == MUL: + registers[RES] = registers[X] * registers[Y] + ip += 1 + elif instructions[ip] == DIV: + registers[RES] = registers[X] / registers[Y] + ip += 1 + elif instructions[ip] == SWAP: + r1 = instructions[ip + 1] + r2 = instructions[ip + 2] + registers[r1], registers[r2] = registers[r2], registers[r1] + ip += 3 + elif instructions[ip] == RETURN: + ip += 1 + cont = False + return registers[RES] + elif instructions[ip] == PUSH_REG: + src = instructions[ip + 1] + stack.append(registers[src]) + ip += 2 + elif instructions[ip] == POP: + dst = instructions[ip + 1] + registers[dst] = stack.pop() + ip += 2 + else: + raise Exception('Cannot eval instruction: {}'.format(instructions[ip])) + return registers[RES] + +def main(): + ast = ['ADD', + ['MUL', + ['MUL', ['CONST', 2], ['CONST', 3]], + ['DIV', ['CONST', 5], ['CONST', 5]]], + ['ADD', + ['SUB', ['CONST', 10], ['CONST', 1]], + ['MUL', ['CONST', 2], ['CONST', 2]]]] + + print('result: {}'.format(eval(compile(ast)))) + +main() diff --git a/users/wpcarro/scratch/compiler/shell.nix b/users/wpcarro/scratch/compiler/shell.nix new file mode 100644 index 000000000000..ec339eb91d98 --- /dev/null +++ b/users/wpcarro/scratch/compiler/shell.nix @@ -0,0 +1,9 @@ +{ pkgs, ... }: + +pkgs.mkShell { + buildInputs = with pkgs; [ + ocaml + ocamlPackages.utop + ocamlformat + ]; +} diff --git a/users/wpcarro/scratch/compiler/tests.ml b/users/wpcarro/scratch/compiler/tests.ml new file mode 100644 index 000000000000..828cbd16f090 --- /dev/null +++ b/users/wpcarro/scratch/compiler/tests.ml @@ -0,0 +1,43 @@ +open Expr_parser +open Type_parser +open Inference + +type test = { input : string; expect : string; } +(* type sub_test = { s1 : string; s2 : string; s3 : string } *) + +let ( let* ) = Option.bind + +let tests = [ + { input = "((fn x x) 10)"; expect = "Integer"; }; + { input = "(let f (fn x x) f)"; expect = "a -> a"; }; +] + +(* let sub_tests = [ *) +(* { *) +(* s1 = "{b |-> b -> Int}"; *) +(* s2 = "{a: Bool, b: Int, c: Bool}"; *) +(* s3 = "{a: Bool, b: Int -> Int, c: Bool}"; *) +(* } *) +(* ] *) + +exception FailedAssertion +exception TestError + +let main = + tests + |> List.iter (fun { input; expect } -> + Printf.sprintf ":t %s == %s\n" input expect |> print_string; + match (parse_language input, parse_input expect) with + | Some ast, Some expected -> + (match do_infer ast with + | Some actual -> + if actual != expected then + begin + print_type actual; + raise FailedAssertion + end + else + print_string "Test passed.\n" + | _ -> raise TestError) + | _ -> raise TestError); + print_string "All tests pass!" diff --git a/users/wpcarro/scratch/compiler/type_parser.ml b/users/wpcarro/scratch/compiler/type_parser.ml new file mode 100644 index 000000000000..99cc8bbc4f4e --- /dev/null +++ b/users/wpcarro/scratch/compiler/type_parser.ml @@ -0,0 +1,104 @@ +(****************************************************************************** + * Type Expression Language: + * + * Helpers: + * symbol -> [a-z] + * + * Core: + * type -> function + * function -> ( variable | literal ) '->' type + * literal -> 'Integer' | 'Boolean' + * variable -> symbol + ******************************************************************************) + +open Types +open Prettify +open Parser +open Inference +open Vec + +type side = LHS | RHS + +let ( let* ) = Option.bind + +let printsub (s : substitution) = + s |> Debug.substitution |> print_string |> print_newline + +let tokenize (x : string) : token vec = + let xs = Vec.create () in + let i = ref 0 in + while !i < String.length x do + match x.[!i] with + | ' ' -> i := !i + 1 + | _ -> + let beg = !i in + while (!i < String.length x) && (x.[!i] != ' ') do + i := !i + 1 + done; + Vec.append (String.sub x beg (!i - beg)) xs + done; + xs + +let rec parse_type (p : parser) : _type option = + parse_function p +and parse_function (p : parser) : _type option = + match p#next with + | Some "->" -> + let* a = parse_literal p in + p#advance; + let* b = parse_type p in + Some (TypeArrow (a, b)) + | _ -> parse_literal p +and parse_literal (p : parser) : _type option = + match p#curr with + | Some "Integer" | Some "Int" -> p#advance; Some TypeInt + | Some "Boolean" | Some "Bool" -> p#advance; Some TypeBool + | Some _ -> parse_variable p + | None -> None +and parse_variable (p : parser) : _type option = + match p#curr with + | Some x when String.length x = 1 -> p#advance; Some (TypeVariable x) + | _ -> None + +let print_tokens (xs : string vec) = + xs + |> Vec.map (Printf.sprintf "\"%s\"") + |> Vec.join ", " + |> Printf.sprintf "tokens: [ %s ]" + |> print_string + |> print_newline + +let print_type (t : _type) = + t |> Debug.type' |> Printf.sprintf "type: %s" |> print_string |> print_newline + +let parse_input (x : string) : _type option = + let tokens = tokenize x in + print_tokens tokens; + parse_type (new parser tokens) + +(* Continually prompt until user provides a parseable type expression *) +let rec read_type (arg : side) : _type = + let prompt = match arg with + | LHS -> "lhs> " + | RHS -> "rhs> " in + print_string prompt; + let x = read_line () in + match parse_input x with + | None -> + print_string "Failed to parse input.\n"; + read_type arg + | Some ast -> + print_type ast; + ast + +let main = + while true do + begin + let lhs = read_type LHS in + let rhs = read_type RHS in + match unify lhs rhs with + | None -> + Printf.printf "Cannot unify \"%s\" with \"%s\"\n" (Debug.type' lhs) (Debug.type' rhs) + | Some x -> printsub x + end + done diff --git a/users/wpcarro/scratch/compiler/types.ml b/users/wpcarro/scratch/compiler/types.ml new file mode 100644 index 000000000000..0acd05737cdc --- /dev/null +++ b/users/wpcarro/scratch/compiler/types.ml @@ -0,0 +1,31 @@ +type literal + = LiteralInt of int + | LiteralBool of bool + | LiteralString of string + +(* Lambda Calculus definition *) +type value = + | ValueLiteral of literal + | ValueVariable of string + | ValueFunction of string * value + | ValueApplication of value * value + | ValueVarApplication of string * value + | ValueBinder of string * value * value + +module FromString = Map.Make (String) + +type _type = + | TypeInt + | TypeBool + | TypeString + | TypeVariable of string + | TypeArrow of _type * _type + +type quantified_type = QuantifiedType of string list * _type + +type set = bool FromString.t +type substitution = _type FromString.t + +type env = quantified_type FromString.t + +type inference = Inference of substitution * _type diff --git a/users/wpcarro/scratch/compiler/vec.ml b/users/wpcarro/scratch/compiler/vec.ml new file mode 100644 index 000000000000..549078c5d87a --- /dev/null +++ b/users/wpcarro/scratch/compiler/vec.ml @@ -0,0 +1,127 @@ +(****************************************************************************** + * Similar to Python's list + * + * - mutable + * - dynamically resized + * - O(1) read + * - O(1) write + * - O(1) append (average case) + * + ******************************************************************************) + +type 'a vec = { + mutable length: int; + mutable capacity: int; + mutable xs: 'a array; +} + +(****************************************************************************** + * Constructors + ******************************************************************************) + +let make (size : int) (seed : 'a) : 'a vec = { + length = size; + capacity = size; + xs = Array.make size seed; +} + +let create () = { + length = 0; + capacity = 0; + xs = [||]; +} + +let from_array (xs : 'a array) : 'a vec = { + length = Array.length xs; + capacity = Array.length xs; + xs = xs; +} + +let from_list (xs : 'a list) : 'a vec = + match xs with + | [] -> create () + | y::ys -> + let result = { + length = List.length xs; + capacity = List.length xs; + xs = Array.make (List.length xs) y; + } in + List.iteri (fun i x -> Array.set result.xs i x) xs; + result + +(****************************************************************************** + * Miscellaneous + ******************************************************************************) + +let append (x : 'a) (v : 'a vec) = + if v.capacity = 0 then + begin + v.length <- 1; + v.capacity <- 1; + v.xs <- [|x|]; + end + else if v.length = v.capacity then + begin + (* According to Wikipedia, Python uses 1.25 as the growth factor *) + let new_cap = v.capacity |> float_of_int |> Float.mul 1.25 |> ceil |> int_of_float in + let new_xs = Array.make new_cap x in + Array.iteri (fun i x -> Array.set new_xs i x) v.xs; + v.capacity <- new_cap; + v.xs <- new_xs; + Array.set v.xs v.length x; + v.length <- v.length + 1; + end + else + begin + Array.set v.xs v.length x; + v.length <- v.length + 1; + end + +let get (i : int) (v : 'a vec) : 'a option = + if i >= v.length then + None + else + Some v.xs.(i) + +let get_unsafe (i : int) (v : 'a vec) : 'a = + v.xs.(i) + +let set (i : int) (x : 'a) (v : 'a vec) : unit = + if i < v.length then + Array.set v.xs i x + +let length (v : 'a vec) : int = + v.length + +let update (i : int) (f : 'a -> 'a) (v : 'a vec) : unit = + match get i v with + | None -> () + | Some x -> set i (f x) v + +let iter (f : 'a -> unit) (v : 'a vec) : unit = + let n = ref 0 in + while !n < v.length do + f v.xs.(!n); + n := !n + 1; + done + +let join (sep : string) (v : string vec) : string = + if length v = 0 then + "" + else + let i = ref 1 in + let result = ref v.xs.(0) in + while !i < v.length do + result := !result ^ sep ^ v.xs.(!i); + i := !i + 1; + done; + !result + +let map (f : 'a -> 'b) (v : 'a vec) : 'b vec = + let result = create () in + iter (fun x -> append (f x) result) v; + result + +let append_to (dst : 'a vec) (xs : 'a vec) : unit = + iter (fun x -> append x dst) xs + |