about summary refs log tree commit diff
path: root/users/wpcarro/scratch/compiler/expr_parser.ml
diff options
context:
space:
mode:
authorWilliam Carroll <wpcarro@gmail.com>2022-10-12T17·22-0700
committerwpcarro <wpcarro@gmail.com>2022-10-24T17·42+0000
commita8876a4cdaaf0a1f051fefad436e08d983b402e2 (patch)
treedfd6f25617691d866d340357e1b808c19ecfdc6a /users/wpcarro/scratch/compiler/expr_parser.ml
parent5bd0e723c13b8a41bfba442bd8ef5351e31099e6 (diff)
feat(wpcarro/scratch): Implement "Algorithm W" r/5194
I've been wanting to grok Haskell-style type inference for awhile, so instead of
just watching conference talks and reading papers about it, I've decided to
attempt to implement it to more readily test my understanding of it.

Change-Id: I69261202a3d74d55c6e38763d7ddfec73c392465
Reviewed-on: https://cl.tvl.fyi/c/depot/+/6988
Tested-by: BuildkiteCI
Reviewed-by: wpcarro <wpcarro@gmail.com>
Autosubmit: wpcarro <wpcarro@gmail.com>
Diffstat (limited to 'users/wpcarro/scratch/compiler/expr_parser.ml')
-rw-r--r--users/wpcarro/scratch/compiler/expr_parser.ml195
1 files changed, 195 insertions, 0 deletions
diff --git a/users/wpcarro/scratch/compiler/expr_parser.ml b/users/wpcarro/scratch/compiler/expr_parser.ml
new file mode 100644
index 000000000000..7d24efc80de3
--- /dev/null
+++ b/users/wpcarro/scratch/compiler/expr_parser.ml
@@ -0,0 +1,195 @@
+(*******************************************************************************
+ * CLI REPL for an s-expression Lambda Calculus.
+ *
+ * Lambda Calculus Expression Language:
+ *
+ *   Helpers:
+ *     symbol     -> [-a-z]+
+ *     boolean    -> 'true' | 'false'
+ *     integer    -> [1-9][0-9]*
+ *
+ *   Core:
+ *     expression -> funcdef
+ *     binding    -> '(' 'let' symbol expr expr ')'
+ *     funcdef    -> '(' 'fn' symbol expr ')'
+ *     funccall   -> '(' ( symbol | funcdef) expr ')'
+ *     literal    -> boolean | integer
+ *     variable   -> symbol
+ *
+ * Example Usage:
+ *   $ ocamlopt types.ml str.cmxa inference.ml parser.ml expr_parser.ml && ./a.out
+ *   repl> true
+ *   tokens: [ "true" ]
+ *   ast: ValueLiteral (LiteralBool true)
+ *   Boolean
+ *   repl>
+ *
+ ******************************************************************************)
+
+open Parser
+open Inference
+
+let to_array (q : 'a Queue.t) : 'a array =
+  let result = Array.make (Queue.length q) "" in
+  let i = ref 0 in
+  Queue.iter
+    (fun x ->
+      result.(!i) <- x;
+      i := !i + 1)
+    q;
+  result
+
+type literal = LiteralBool of bool | LiteralInt of int
+
+let ( let* ) = Option.bind
+let map = Option.map
+
+let tokenize (x : string) : token array =
+  let q = Queue.create () in
+  let i = ref 0 in
+  while !i < String.length x do
+    match x.[!i] with
+    | ' ' -> i := !i + 1
+    | '(' ->
+        Queue.push "(" q;
+        i := !i + 1
+    | ')' ->
+        Queue.push ")" q;
+        i := !i + 1
+    | _ ->
+        let token = ref "" in
+        while !i < String.length x && not (String.contains "() " x.[!i]) do
+          token := !token ^ String.make 1 x.[!i];
+          i := !i + 1
+        done;
+        Queue.push !token q
+  done;
+  to_array q
+
+let parse_symbol (p : parser) : string option =
+  let* x = p#curr in
+  if Str.string_match (Str.regexp "[-a-z][0-9]*") x 0 then
+    begin
+      p#advance;
+      Some x
+    end
+  else
+    None
+
+let parse_variable (p : parser) : Types.value option =
+  let* x = parse_symbol p in
+  Some (Types.ValueVariable x)
+
+let parse_literal (p : parser) : Types.value option =
+  match p#curr with
+  | Some "true" ->
+     p#advance;
+     Some (ValueLiteral (LiteralBool true))
+  | Some "false" ->
+     p#advance;
+     Some (ValueLiteral (LiteralBool false))
+  | Some x ->
+     (match int_of_string_opt x with
+      | Some n ->
+         p#advance;
+         Some (ValueLiteral (LiteralInt n))
+      | _ -> parse_variable p)
+  | _ -> None
+
+let rec parse_expression (p : parser) : Types.value option =
+  parse_binding p
+
+and parse_funccall (p : parser) : Types.value option =
+  match (p#curr, p#next) with
+  | (Some "(", Some "(") ->
+     p#advance;
+     let* f = parse_funcdef p in
+     let* x = parse_expression p in
+     p#expect ")";
+     Some (Types.ValueApplication (f, x))
+  | (Some "(", _) ->
+     p#advance;
+     let* f = parse_symbol p in
+     let* x = parse_expression p in
+     p#expect ")";
+     Some (Types.ValueVarApplication (f, x))
+  | _ -> parse_literal p
+
+and parse_funcdef (p : parser) : Types.value option =
+  match (p#curr, p#next) with
+  | (Some "(", Some "fn") ->
+     p#advance;
+     p#advance;
+     let* name = parse_symbol p in
+     let* body = parse_expression p in
+     p#expect ")";
+     Some (Types.ValueFunction (name, body))
+  | _ -> parse_funccall p
+
+and parse_binding (p : parser) : Types.value option =
+  match (p#curr, p#next) with
+  | (Some "(", Some "let") ->
+     p#advance;
+     p#advance;
+     let* name = parse_symbol p in
+     let* value = parse_expression p in
+     let* body = parse_expression p in
+     Some (Types.ValueBinder (name, value, body))
+  | _ -> parse_funcdef p
+
+let print_tokens (xs : string array) =
+  xs |> Array.to_list
+  |> List.map (Printf.sprintf "\"%s\"")
+  |> String.concat ", "
+  |> Printf.sprintf "tokens: [ %s ]"
+  |> print_string |> print_newline
+
+let parse_language (x : string) : Types.value option =
+  let tokens = tokenize x in
+  print_tokens tokens;
+  parse_expression (new parser tokens)
+
+let rec debug (ast : Types.value) : string =
+  match ast with
+  | ValueLiteral (LiteralBool x) ->
+     Printf.sprintf "ValueLiteral (LiteralBool %s)" (string_of_bool x)
+  | ValueLiteral (LiteralInt x) ->
+     Printf.sprintf "ValueLiteral (LiteralInt %s)" (string_of_int x)
+  | ValueVariable x ->
+     Printf.sprintf "ValueVariable %s" x
+  | ValueFunction (x, body) ->
+     Printf.sprintf "ValueFunction (%s, %s)" x (debug body)
+  | ValueApplication (f, x) ->
+     Printf.sprintf "ValueApplication (%s, %s)" (debug f) (debug x)
+  | ValueVarApplication (f, x) ->
+     Printf.sprintf "ValueVarApplication (%s, %s)" f (debug x)
+  | ValueBinder (k, v, x) ->
+      Printf.sprintf "ValueBinder (%s, %s, %s)" k (debug v) (debug x)
+
+let debug_ast (ast : Types.value) : Types.value =
+  ast |> debug |> Printf.sprintf "ast: %s" |> print_string |> print_newline;
+  ast
+
+let main =
+  while true do
+    begin
+      print_string "repl> ";
+      let x = read_line () in
+      match parse_language x with
+      | Some ast ->
+         (match ast |> debug_ast |> do_infer with
+          | None ->
+             "Type-check failed"
+             |> print_string
+             |> print_newline
+          | Some x ->
+             x
+             |> Types.pretty
+             |> print_string
+             |> print_newline)
+      | None ->
+         "Could not parse"
+         |> print_string
+         |> print_newline
+    end
+  done