From 80f8ede0bbc9799d5199707e1e1ad8e80e4ca7ac Mon Sep 17 00:00:00 2001 From: Griffin Smith Date: Sun, 7 Mar 2021 15:29:59 -0500 Subject: Initial commit --- .envrc | 1 + .gitignore | 1 + Cargo.lock | 789 +++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 18 ++ ach/.gitignore | 2 + ach/Makefile | 15 + ach/functions.ach | 3 + ach/simple.ach | 1 + shell.nix | 20 ++ src/ast/mod.rs | 166 ++++++++++ src/codegen/llvm.rs | 282 +++++++++++++++++ src/codegen/mod.rs | 26 ++ src/commands/compile.rs | 28 ++ src/commands/eval.rs | 30 ++ src/commands/mod.rs | 5 + src/common/env.rs | 40 +++ src/common/error.rs | 50 +++ src/common/mod.rs | 4 + src/compiler.rs | 84 +++++ src/interpreter/error.rs | 16 + src/interpreter/mod.rs | 125 ++++++++ src/interpreter/value.rs | 134 ++++++++ src/main.rs | 31 ++ src/parser/mod.rs | 445 ++++++++++++++++++++++++++ 24 files changed, 2316 insertions(+) create mode 100644 .envrc create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 ach/.gitignore create mode 100644 ach/Makefile create mode 100644 ach/functions.ach create mode 100644 ach/simple.ach create mode 100644 shell.nix create mode 100644 src/ast/mod.rs create mode 100644 src/codegen/llvm.rs create mode 100644 src/codegen/mod.rs create mode 100644 src/commands/compile.rs create mode 100644 src/commands/eval.rs create mode 100644 src/commands/mod.rs create mode 100644 src/common/env.rs create mode 100644 src/common/error.rs create mode 100644 src/common/mod.rs create mode 100644 src/compiler.rs create mode 100644 src/interpreter/error.rs create mode 100644 src/interpreter/mod.rs create mode 100644 src/interpreter/value.rs create mode 100644 src/main.rs create mode 100644 src/parser/mod.rs diff --git a/.envrc b/.envrc new file mode 100644 index 000000000000..051d09d292a8 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +eval "$(lorri direnv)" diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000000..ea8c4bf7f35f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 000000000000..485e9f4cdfcb --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,789 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "achilles" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "derive_more", + "inkwell", + "llvm-sys", + "nom", + "nom-trace", + "pratt", + "proptest", + "test-strategy", + "thiserror", +] + +[[package]] +name = "aho-corasick" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afddf7f520a80dbf76e6f50a35bca42a2331ef227a28b3b6dc5c2e2338d114b1" + +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "bit-set" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" + +[[package]] +name = "bitvec" +version = "0.19.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8942c8d352ae1838c9dda0b0ca2ab657696ef2232a20147cf1b30ae1a9cb4321" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "byteorder" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae44d1a3d5a19df61dd0c8beb138458ac2a53a7ac09eba97d55592540004306b" + +[[package]] +name = "cc" +version = "1.0.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c69b077ad434294d3ce9f1f6143a2a4b89a8a2d54ef813d85003a4fd1137fd" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "3.0.0-beta.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bd1061998a501ee7d4b6d449020df3266ca3124b941ec56cf2005c3779ca142" +dependencies = [ + "atty", + "bitflags", + "clap_derive", + "indexmap", + "lazy_static", + "os_str_bytes", + "strsim", + "termcolor", + "textwrap", + "unicode-width", + "vec_map", +] + +[[package]] +name = "clap_derive" +version = "3.0.0-beta.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "370f715b81112975b1b69db93e0b56ea4cd4e5002ac43b2da8474106a54096a1" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_more" +version = "0.99.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41cb0e6161ad61ed084a36ba71fbba9e3ac5aee3606fb607fe08da6acbcf3d8c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "funty" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" + +[[package]] +name = "getrandom" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9495705279e7140bf035dde1f6e750c162df8b625267cd52cc44e0b156732c8" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "hashbrown" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" + +[[package]] +name = "heck" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cbf45460356b7deeb5e3415b5563308c0a9b057c85e12b06ad551f98d0a6ac" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "hermit-abi" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c" +dependencies = [ + "libc", +] + +[[package]] +name = "indexmap" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824845a0bf897a9042383849b02c1bc219c2383772efcd5c6f9766fa4b81aef3" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "inkwell" +version = "0.1.0" +source = "git+https://github.com/TheDan64/inkwell?branch=master#a2db15b0bd1c06d71763585ae10d9ea4e775da0c" +dependencies = [ + "either", + "inkwell_internals", + "libc", + "llvm-sys", + "once_cell", + "parking_lot", + "regex", +] + +[[package]] +name = "inkwell_internals" +version = "0.3.0" +source = "git+https://github.com/TheDan64/inkwell?branch=master#a2db15b0bd1c06d71763585ae10d9ea4e775da0c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "instant" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lexical-core" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21f866863575d0e1d654fbeeabdc927292fdf862873dc3c96c6f753357e13374" +dependencies = [ + "arrayvec", + "bitflags", + "cfg-if", + "ryu", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.88" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03b07a082330a35e43f63177cc01689da34fbffa0105e1246cf0311472cac73a" + +[[package]] +name = "llvm-sys" +version = "110.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21ede189444b8c78907e5d36da5dabcf153170fcff9c1dba48afc4b33c7e19f0" +dependencies = [ + "cc", + "lazy_static", + "libc", + "regex", + "semver", +] + +[[package]] +name = "lock_api" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd96ffd135b2fd7b973ac026d28085defbe8983df057ced3eb4f2130b0831312" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "nom" +version = "6.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7413f999671bd4745a7b624bd370a569fb6bc574b23c83a3c5ed2e453f3d5e2" +dependencies = [ + "bitvec", + "funty", + "lexical-core", + "memchr", + "version_check", +] + +[[package]] +name = "nom-trace" +version = "0.2.1" +source = "git+https://github.com/glittershark/nom-trace?branch=nom-6#6168d2e15cc51efd12d80260159b76a764dba138" +dependencies = [ + "nom", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3" + +[[package]] +name = "os_str_bytes" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afb2e1c3ee07430c2cf76151675e583e0f19985fa6efae47d6848a3e2c824f85" + +[[package]] +name = "parking_lot" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7a782938e745763fe6907fc6ba86946d72f49fe7e21de074e08128a99fb018" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + +[[package]] +name = "pest" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +dependencies = [ + "ucd-trie", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" + +[[package]] +name = "pratt" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e31bbc12f7936a7b195790dd6d9b982b66c54f45ff6766decf25c44cac302dce" + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "proptest" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0d9cc07f18492d879586c92b485def06bc850da3118075cd45d50e9c95b0e5" +dependencies = [ + "bit-set", + "bitflags", + "byteorder", + "lazy_static", + "num-traits", + "quick-error 2.0.0", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quick-error" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ac73b1112776fc109b2e61909bc46c7e1bf0d7f690ffb1676553acce16d5cda" + +[[package]] +name = "quote" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "radium" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8" + +[[package]] +name = "rand" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ef9e7e66b4468674bfcb0c81af8b7fa0bb154fa9f28eb840da5c447baeb8d7e" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", + "rand_hc", +] + +[[package]] +name = "rand_chacha" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e12735cf05c9e10bf21534da50a147b924d555dc7a547c42e6bb2d5b6017ae0d" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34cf66eb183df1c5876e2dcf6b13d57340741e8dc255b48e40a26de954d06ae7" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_hc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3190ef7066a446f2e7f42e239d161e905420ccab01eb967c9eb27d21b2322a73" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_xorshift" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" +dependencies = [ + "rand_core", +] + +[[package]] +name = "redox_syscall" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94341e4e44e24f6b591b59e47a8a027df12e008d73fd5672dbea9cc22f4507d9" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", +] + +[[package]] +name = "regex-syntax" +version = "0.6.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581" + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "rusty-fork" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +dependencies = [ + "fnv", + "quick-error 1.2.3", + "tempfile", + "wait-timeout", +] + +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "semver" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver-parser" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" +dependencies = [ + "pest", +] + +[[package]] +name = "smallvec" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed22b90a0e734a23a7610f4283ac9e5acfb96cbb30dfefa540d66f866f1c09c5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "tempfile" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" +dependencies = [ + "cfg-if", + "libc", + "rand", + "redox_syscall", + "remove_dir_all", + "winapi", +] + +[[package]] +name = "termcolor" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "test-strategy" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2328963c69243416e811c88066d18f670792b2e36e17fa57f4b1a124f85d18a8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "textwrap" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "203008d98caf094106cfaba70acfed15e18ed3ddb7d94e49baec153a2b462789" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0f4a65597094d4483ddaed134f409b2cb7c1beccf25201a9f73c719254fa98e" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7765189610d8241a44529806d6fd1f2e0a08734313a35d5b3a556f92b381f3c0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd" +dependencies = [ + "once_cell", +] + +[[package]] +name = "ucd-trie" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c" + +[[package]] +name = "unicode-segmentation" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0d2e7be6ae3a5fa87eed5fb451aff96f2573d2694942e40543ae0bbe19c796" + +[[package]] +name = "unicode-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "version_check" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" + +[[package]] +name = "wait-timeout" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +dependencies = [ + "libc", +] + +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "wyz" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85e60b0d1b5f99db2556934e21937020776a5d31520bf169e851ac44e6420214" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000000..eda15e554661 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "achilles" +version = "0.1.0" +authors = ["Griffin Smith "] +edition = "2018" + +[dependencies] +anyhow = "1.0.38" +clap = "3.0.0-beta.2" +derive_more = "0.99.11" +inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm11-0"] } +llvm-sys = "110.0.1" +nom = "6.1.2" +nom-trace = { git = "https://github.com/glittershark/nom-trace", branch = "nom-6" } +pratt = "0.3.0" +proptest = "1.0.0" +test-strategy = "0.1.1" +thiserror = "1.0.24" diff --git a/ach/.gitignore b/ach/.gitignore new file mode 100644 index 000000000000..e8423ae351b8 --- /dev/null +++ b/ach/.gitignore @@ -0,0 +1,2 @@ +*.ll +*.o diff --git a/ach/Makefile b/ach/Makefile new file mode 100644 index 000000000000..869a0d0f8a3e --- /dev/null +++ b/ach/Makefile @@ -0,0 +1,15 @@ +default: simple + +%.ll: %.ach + cargo run -- compile $< -o $@ -f llvm + +%.o: %.ll + llc $< -o $@ -filetype=obj + +%: %.o + clang $< -o $@ + +.PHONY: clean + +clean: + @rm -f *.ll *.o simple diff --git a/ach/functions.ach b/ach/functions.ach new file mode 100644 index 000000000000..8d91564c1559 --- /dev/null +++ b/ach/functions.ach @@ -0,0 +1,3 @@ +fn id x = x +fn plus x y = x + y +fn main = plus (id 2) 7 diff --git a/ach/simple.ach b/ach/simple.ach new file mode 100644 index 000000000000..20f1677235c0 --- /dev/null +++ b/ach/simple.ach @@ -0,0 +1 @@ +fn main = let x = 2; y = 3 in x + y diff --git a/shell.nix b/shell.nix new file mode 100644 index 000000000000..cdf74db415ca --- /dev/null +++ b/shell.nix @@ -0,0 +1,20 @@ +with import (builtins.fetchTarball { + url = "https://github.com/nixos/nixpkgs/archive/93a812bb9f9c398bd5b9636ab3674dcfe8cfb884.tar.gz"; + sha256 = "14zzsgnigd7vjbrpzm1s4qsknm73sci38ss00x96wamz6psaxyah"; +}) {}; + +mkShell { + buildInputs = [ + clang_11 + llvm_11.lib + llvmPackages_11.bintools + llvmPackages_11.clang + llvmPackages_11.libclang.lib + zlib + ncurses + libxml2 + libffi + pkg-config + ]; + +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs new file mode 100644 index 000000000000..2dcf955fe67c --- /dev/null +++ b/src/ast/mod.rs @@ -0,0 +1,166 @@ +use std::borrow::Cow; +use std::convert::TryFrom; +use std::fmt::{self, Display, Formatter}; + +#[derive(Debug, PartialEq, Eq)] +pub struct InvalidIdentifier<'a>(Cow<'a, str>); + +#[derive(Debug, PartialEq, Eq, Hash)] +pub struct Ident<'a>(pub Cow<'a, str>); + +impl<'a> From<&'a Ident<'a>> for &'a str { + fn from(id: &'a Ident<'a>) -> Self { + id.0.as_ref() + } +} + +impl<'a> Display for Ident<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl<'a> Ident<'a> { + pub fn to_owned(&self) -> Ident<'static> { + Ident(Cow::Owned(self.0.clone().into_owned())) + } + + pub fn from_str_unchecked(s: &'a str) -> Self { + debug_assert!(is_valid_identifier(s)); + Self(Cow::Borrowed(s)) + } + + pub fn from_string_unchecked(s: String) -> Self { + debug_assert!(is_valid_identifier(&s)); + Self(Cow::Owned(s)) + } +} + +pub fn is_valid_identifier(s: &S) -> bool +where + S: AsRef + ?Sized, +{ + s.as_ref() + .chars() + .any(|c| !c.is_alphanumeric() || !"_".contains(c)) +} + +impl<'a> TryFrom<&'a str> for Ident<'a> { + type Error = InvalidIdentifier<'a>; + + fn try_from(s: &'a str) -> Result { + if is_valid_identifier(s) { + Ok(Ident(Cow::Borrowed(s))) + } else { + Err(InvalidIdentifier(Cow::Borrowed(s))) + } + } +} + +impl<'a> TryFrom for Ident<'a> { + type Error = InvalidIdentifier<'static>; + + fn try_from(s: String) -> Result { + if is_valid_identifier(&s) { + Ok(Ident(Cow::Owned(s))) + } else { + Err(InvalidIdentifier(Cow::Owned(s))) + } + } +} + +#[derive(Debug, PartialEq, Eq)] +pub enum BinaryOperator { + /// `+` + Add, + + /// `-` + Sub, + + /// `*` + Mul, + + /// `/` + Div, + + /// `^` + Pow, + + /// `==` + Equ, + + /// `!=` + Neq, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum UnaryOperator { + /// ! + Not, + + /// - + Neg, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum Literal { + Int(u64), +} + +#[derive(Debug, PartialEq, Eq)] +pub enum Expr<'a> { + Ident(Ident<'a>), + + Literal(Literal), + + UnaryOp { + op: UnaryOperator, + rhs: Box>, + }, + + BinaryOp { + lhs: Box>, + op: BinaryOperator, + rhs: Box>, + }, + + Let { + bindings: Vec<(Ident<'a>, Expr<'a>)>, + body: Box>, + }, + + If { + condition: Box>, + then: Box>, + else_: Box>, + }, +} + +#[derive(Debug, PartialEq, Eq)] +pub struct Fun<'a> { + pub name: Ident<'a>, + pub args: Vec>, + pub body: Expr<'a>, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum Decl<'a> { + Fun(Fun<'a>), +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum Type { + Int, + Float, + Bool, +} + +impl Display for Type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Int => f.write_str("int"), + Self::Float => f.write_str("float"), + Self::Bool => f.write_str("bool"), + } + } +} diff --git a/src/codegen/llvm.rs b/src/codegen/llvm.rs new file mode 100644 index 000000000000..ff92c3373176 --- /dev/null +++ b/src/codegen/llvm.rs @@ -0,0 +1,282 @@ +use std::path::Path; +use std::result; + +use inkwell::basic_block::BasicBlock; +use inkwell::builder::Builder; +pub use inkwell::context::Context; +use inkwell::module::Module; +use inkwell::support::LLVMString; +use inkwell::types::FunctionType; +use inkwell::values::{BasicValueEnum, FunctionValue}; +use inkwell::IntPredicate; +use thiserror::Error; + +use crate::ast::{BinaryOperator, Decl, Expr, Fun, Ident, Literal, UnaryOperator}; +use crate::common::env::Env; + +#[derive(Debug, PartialEq, Eq, Error)] +pub enum Error { + #[error("Undefined variable {0}")] + UndefinedVariable(Ident<'static>), + + #[error("LLVM Error: {0}")] + LLVMError(String), +} + +impl From for Error { + fn from(s: LLVMString) -> Self { + Self::LLVMError(s.to_string()) + } +} + +pub type Result = result::Result; + +pub struct Codegen<'ctx, 'ast> { + context: &'ctx Context, + pub module: Module<'ctx>, + builder: Builder<'ctx>, + env: Env<'ast, BasicValueEnum<'ctx>>, + function: Option>, +} + +impl<'ctx, 'ast> Codegen<'ctx, 'ast> { + pub fn new(context: &'ctx Context, module_name: &str) -> Self { + let module = context.create_module(module_name); + let builder = context.create_builder(); + Self { + context, + module, + builder, + env: Default::default(), + function: None, + } + } + + pub fn new_function<'a>( + &'a mut self, + name: &str, + ty: FunctionType<'ctx>, + ) -> &'a FunctionValue<'ctx> { + self.function = Some(self.module.add_function(name, ty, None)); + let basic_block = self.append_basic_block("entry"); + self.builder.position_at_end(basic_block); + self.function.as_ref().unwrap() + } + + pub fn finish_function(&self, res: &BasicValueEnum<'ctx>) { + self.builder.build_return(Some(res)); + } + + pub fn append_basic_block(&self, name: &str) -> BasicBlock<'ctx> { + self.context + .append_basic_block(self.function.unwrap(), name) + } + + pub fn codegen_expr(&mut self, expr: &'ast Expr<'ast>) -> Result> { + match expr { + Expr::Ident(id) => self + .env + .resolve(id) + .cloned() + .ok_or_else(|| Error::UndefinedVariable(id.to_owned())), + Expr::Literal(Literal::Int(i)) => { + let ty = self.context.i64_type(); + Ok(BasicValueEnum::IntValue(ty.const_int(*i, false))) + } + Expr::UnaryOp { op, rhs } => { + let rhs = self.codegen_expr(rhs)?; + match op { + UnaryOperator::Not => unimplemented!(), + UnaryOperator::Neg => Ok(BasicValueEnum::IntValue( + self.builder.build_int_neg(rhs.into_int_value(), "neg"), + )), + } + } + Expr::BinaryOp { lhs, op, rhs } => { + let lhs = self.codegen_expr(lhs)?; + let rhs = self.codegen_expr(rhs)?; + match op { + BinaryOperator::Add => { + Ok(BasicValueEnum::IntValue(self.builder.build_int_add( + lhs.into_int_value(), + rhs.into_int_value(), + "add", + ))) + } + BinaryOperator::Sub => { + Ok(BasicValueEnum::IntValue(self.builder.build_int_sub( + lhs.into_int_value(), + rhs.into_int_value(), + "add", + ))) + } + BinaryOperator::Mul => { + Ok(BasicValueEnum::IntValue(self.builder.build_int_sub( + lhs.into_int_value(), + rhs.into_int_value(), + "add", + ))) + } + BinaryOperator::Div => { + Ok(BasicValueEnum::IntValue(self.builder.build_int_signed_div( + lhs.into_int_value(), + rhs.into_int_value(), + "add", + ))) + } + BinaryOperator::Pow => unimplemented!(), + BinaryOperator::Equ => { + Ok(BasicValueEnum::IntValue(self.builder.build_int_compare( + IntPredicate::EQ, + lhs.into_int_value(), + rhs.into_int_value(), + "eq", + ))) + } + BinaryOperator::Neq => todo!(), + } + } + Expr::Let { bindings, body } => { + self.env.push(); + for (id, val) in bindings { + let val = self.codegen_expr(val)?; + self.env.set(id, val); + } + let res = self.codegen_expr(body); + self.env.pop(); + res + } + Expr::If { + condition, + then, + else_, + } => { + let then_block = self.append_basic_block("then"); + let else_block = self.append_basic_block("else"); + let join_block = self.append_basic_block("join"); + let condition = self.codegen_expr(condition)?; + self.builder.build_conditional_branch( + condition.into_int_value(), + then_block, + else_block, + ); + self.builder.position_at_end(then_block); + let then_res = self.codegen_expr(then)?; + self.builder.build_unconditional_branch(join_block); + + self.builder.position_at_end(else_block); + let else_res = self.codegen_expr(else_)?; + self.builder.build_unconditional_branch(join_block); + + self.builder.position_at_end(join_block); + let phi = self.builder.build_phi(self.context.i64_type(), "join"); + phi.add_incoming(&[(&then_res, then_block), (&else_res, else_block)]); + Ok(phi.as_basic_value()) + } + } + } + + pub fn codegen_decl(&mut self, decl: &'ast Decl<'ast>) -> Result<()> { + match decl { + Decl::Fun(Fun { name, args, body }) => { + let i64_type = self.context.i64_type(); + self.new_function( + name.into(), + i64_type.fn_type( + args.iter() + .map(|_| i64_type.into()) + .collect::>() + .as_slice(), + false, + ), + ); + self.env.push(); + for (i, arg) in args.iter().enumerate() { + self.env + .set(arg, self.function.unwrap().get_nth_param(i as u32).unwrap()); + } + let res = self.codegen_expr(body)?; + self.env.pop(); + self.finish_function(&res); + Ok(()) + } + } + } + + pub fn codegen_main(&mut self, expr: &'ast Expr<'ast>) -> Result<()> { + self.new_function("main", self.context.i64_type().fn_type(&[], false)); + let res = self.codegen_expr(expr)?; + self.finish_function(&res); + Ok(()) + } + + pub fn print_to_file

(&self, path: P) -> Result<()> + where + P: AsRef, + { + Ok(self.module.print_to_file(path)?) + } + + pub fn binary_to_file

(&self, path: P) -> Result<()> + where + P: AsRef, + { + if self.module.write_bitcode_to_path(path.as_ref()) { + Ok(()) + } else { + Err(Error::LLVMError( + "Error writing bitcode to output path".to_owned(), + )) + } + } +} + +#[cfg(test)] +mod tests { + use inkwell::execution_engine::JitFunction; + use inkwell::OptimizationLevel; + + use super::*; + + fn jit_eval(expr: &str) -> anyhow::Result { + let expr = crate::parser::expr(expr).unwrap().1; + + let context = Context::create(); + let mut codegen = Codegen::new(&context, "test"); + let execution_engine = codegen + .module + .create_jit_execution_engine(OptimizationLevel::None) + .unwrap(); + + codegen.new_function("test", context.i64_type().fn_type(&[], false)); + let res = codegen.codegen_expr(&expr)?; + codegen.finish_function(&res); + + unsafe { + let fun: JitFunction T> = + execution_engine.get_function("test")?; + Ok(fun.call()) + } + } + + #[test] + fn add_literals() { + assert_eq!(jit_eval::("1 + 2").unwrap(), 3); + } + + #[test] + fn variable_shadowing() { + assert_eq!( + jit_eval::("let x = 1 in (let x = 2 in x) + x").unwrap(), + 3 + ); + } + + #[test] + fn eq() { + assert_eq!( + jit_eval::("let x = 1 in if x == 1 then 2 else 4").unwrap(), + 2 + ); + } +} diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs new file mode 100644 index 000000000000..4620b6b48e84 --- /dev/null +++ b/src/codegen/mod.rs @@ -0,0 +1,26 @@ +pub mod llvm; + +use inkwell::execution_engine::JitFunction; +use inkwell::OptimizationLevel; +pub use llvm::*; + +use crate::ast::Expr; +use crate::common::Result; + +pub fn jit_eval(expr: &Expr) -> Result { + let context = Context::create(); + let mut codegen = Codegen::new(&context, "eval"); + let execution_engine = codegen + .module + .create_jit_execution_engine(OptimizationLevel::None) + .map_err(Error::from)?; + codegen.new_function("eval", context.i64_type().fn_type(&[], false)); + let res = codegen.codegen_expr(&expr)?; + codegen.finish_function(&res); + + unsafe { + let fun: JitFunction T> = + execution_engine.get_function("eval").unwrap(); + Ok(fun.call()) + } +} diff --git a/src/commands/compile.rs b/src/commands/compile.rs new file mode 100644 index 000000000000..e16b8c87a659 --- /dev/null +++ b/src/commands/compile.rs @@ -0,0 +1,28 @@ +use std::path::PathBuf; + +use clap::Clap; + +use crate::common::Result; +use crate::compiler::{self, CompilerOptions}; + +#[derive(Clap)] +pub struct Compile { + file: PathBuf, + + #[clap(short = 'o')] + out_file: PathBuf, + + #[clap(flatten)] + options: CompilerOptions, +} + +impl Compile { + pub fn run(self) -> Result<()> { + eprintln!( + ">>> {} -> {}", + &self.file.to_string_lossy(), + self.out_file.to_string_lossy() + ); + compiler::compile_file(&self.file, &self.out_file, &self.options) + } +} diff --git a/src/commands/eval.rs b/src/commands/eval.rs new file mode 100644 index 000000000000..112bee64625b --- /dev/null +++ b/src/commands/eval.rs @@ -0,0 +1,30 @@ +use clap::Clap; + +use crate::codegen; +use crate::interpreter; +use crate::parser; +use crate::Result; + +/// Evaluate an expression and print its result +#[derive(Clap)] +pub struct Eval { + /// JIT-compile with LLVM instead of interpreting + #[clap(long)] + jit: bool, + + /// Expression to evaluate + expr: String, +} + +impl Eval { + pub fn run(self) -> Result<()> { + let (_, parsed) = parser::expr(&self.expr)?; + let result = if self.jit { + codegen::jit_eval::(&parsed)?.into() + } else { + interpreter::eval(&parsed)? + }; + println!("{}", result); + Ok(()) + } +} diff --git a/src/commands/mod.rs b/src/commands/mod.rs new file mode 100644 index 000000000000..9c0038dabfb1 --- /dev/null +++ b/src/commands/mod.rs @@ -0,0 +1,5 @@ +pub mod compile; +pub mod eval; + +pub use compile::Compile; +pub use eval::Eval; diff --git a/src/common/env.rs b/src/common/env.rs new file mode 100644 index 000000000000..8b5cde49e9e4 --- /dev/null +++ b/src/common/env.rs @@ -0,0 +1,40 @@ +use std::collections::HashMap; + +use crate::ast::Ident; + +/// A lexical environment +#[derive(Debug, PartialEq, Eq)] +pub struct Env<'ast, V>(Vec, V>>); + +impl<'ast, V> Default for Env<'ast, V> { + fn default() -> Self { + Self::new() + } +} + +impl<'ast, V> Env<'ast, V> { + pub fn new() -> Self { + Self(vec![Default::default()]) + } + + pub fn push(&mut self) { + self.0.push(Default::default()); + } + + pub fn pop(&mut self) { + self.0.pop(); + } + + pub fn set(&mut self, k: &'ast Ident<'ast>, v: V) { + self.0.last_mut().unwrap().insert(k, v); + } + + pub fn resolve<'a>(&'a self, k: &'ast Ident<'ast>) -> Option<&'a V> { + for ctx in self.0.iter().rev() { + if let Some(res) = ctx.get(k) { + return Some(res); + } + } + None + } +} diff --git a/src/common/error.rs b/src/common/error.rs new file mode 100644 index 000000000000..f3f3023ceaf8 --- /dev/null +++ b/src/common/error.rs @@ -0,0 +1,50 @@ +use std::{io, result}; + +use thiserror::Error; + +use crate::{codegen, interpreter, parser}; + +#[derive(Error, Debug)] +pub enum Error { + #[error(transparent)] + IOError(#[from] io::Error), + + #[error("Error parsing input: {0}")] + ParseError(#[from] parser::Error), + + #[error("Error evaluating expression: {0}")] + EvalError(#[from] interpreter::Error), + + #[error("Compile error: {0}")] + CodegenError(#[from] codegen::Error), + + #[error("{0}")] + Message(String), +} + +impl From for Error { + fn from(s: String) -> Self { + Self::Message(s) + } +} + +impl<'a> From>> for Error { + fn from(e: nom::Err>) -> Self { + use nom::error::Error as NomError; + use nom::Err::*; + + Self::ParseError(match e { + Incomplete(i) => Incomplete(i), + Error(NomError { input, code }) => Error(NomError { + input: input.to_owned(), + code, + }), + Failure(NomError { input, code }) => Failure(NomError { + input: input.to_owned(), + code, + }), + }) + } +} + +pub type Result = result::Result; diff --git a/src/common/mod.rs b/src/common/mod.rs new file mode 100644 index 000000000000..af5974a116fb --- /dev/null +++ b/src/common/mod.rs @@ -0,0 +1,4 @@ +pub(crate) mod env; +pub(crate) mod error; + +pub use error::{Error, Result}; diff --git a/src/compiler.rs b/src/compiler.rs new file mode 100644 index 000000000000..5f8e1ef4fa03 --- /dev/null +++ b/src/compiler.rs @@ -0,0 +1,84 @@ +use std::fmt::{self, Display}; +use std::path::Path; +use std::str::FromStr; +use std::{fs, result}; + +use clap::Clap; +use test_strategy::Arbitrary; + +use crate::codegen::{self, Codegen}; +use crate::common::Result; +use crate::parser; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Arbitrary)] +pub enum OutputFormat { + LLVM, + Bitcode, +} + +impl Default for OutputFormat { + fn default() -> Self { + Self::Bitcode + } +} + +impl FromStr for OutputFormat { + type Err = String; + + fn from_str(s: &str) -> result::Result { + match s { + "llvm" => Ok(Self::LLVM), + "binary" => Ok(Self::Bitcode), + _ => Err(format!( + "Invalid output format {}, expected one of {{llvm, binary}}", + s + )), + } + } +} + +impl Display for OutputFormat { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + OutputFormat::LLVM => f.write_str("llvm"), + OutputFormat::Bitcode => f.write_str("binary"), + } + } +} + +#[derive(Clap, Debug, PartialEq, Eq, Default)] +pub struct CompilerOptions { + #[clap(long, short = 'f', default_value)] + format: OutputFormat, +} + +pub fn compile_file(input: &Path, output: &Path, options: &CompilerOptions) -> Result<()> { + let src = fs::read_to_string(input)?; + let (_, decls) = parser::toplevel(&src)?; // TODO: statements + let context = codegen::Context::create(); + let mut codegen = Codegen::new( + &context, + &input + .file_stem() + .map_or("UNKNOWN".to_owned(), |s| s.to_string_lossy().into_owned()), + ); + for decl in &decls { + codegen.codegen_decl(decl)?; + } + match options.format { + OutputFormat::LLVM => codegen.print_to_file(output)?, + OutputFormat::Bitcode => codegen.binary_to_file(output)?, + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use test_strategy::proptest; + + #[proptest] + fn output_format_display_from_str_round_trip(of: OutputFormat) { + assert_eq!(OutputFormat::from_str(&of.to_string()), Ok(of)); + } +} diff --git a/src/interpreter/error.rs b/src/interpreter/error.rs new file mode 100644 index 000000000000..e0299d180553 --- /dev/null +++ b/src/interpreter/error.rs @@ -0,0 +1,16 @@ +use std::result; + +use thiserror::Error; + +use crate::ast::{Ident, Type}; + +#[derive(Debug, PartialEq, Eq, Error)] +pub enum Error { + #[error("Undefined variable {0}")] + UndefinedVariable(Ident<'static>), + + #[error("Unexpected type {actual}, expected type {expected}")] + InvalidType { actual: Type, expected: Type }, +} + +pub type Result = result::Result; diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs new file mode 100644 index 000000000000..adff3568c2c3 --- /dev/null +++ b/src/interpreter/mod.rs @@ -0,0 +1,125 @@ +mod error; +mod value; + +pub use self::error::{Error, Result}; +pub use self::value::Value; +use crate::ast::{BinaryOperator, Expr, Ident, Literal, UnaryOperator}; +use crate::common::env::Env; + +#[derive(Debug, Default)] +pub struct Interpreter<'a> { + env: Env<'a, Value>, +} + +impl<'a> Interpreter<'a> { + pub fn new() -> Self { + Self::default() + } + + fn resolve(&self, var: &'a Ident<'a>) -> Result { + self.env + .resolve(var) + .cloned() + .ok_or_else(|| Error::UndefinedVariable(var.to_owned())) + } + + pub fn eval(&mut self, expr: &'a Expr<'a>) -> Result { + match expr { + Expr::Ident(id) => self.resolve(id), + Expr::Literal(Literal::Int(i)) => Ok((*i).into()), + Expr::UnaryOp { op, rhs } => { + let rhs = self.eval(rhs)?; + match op { + UnaryOperator::Neg => -rhs, + _ => unimplemented!(), + } + } + Expr::BinaryOp { lhs, op, rhs } => { + let lhs = self.eval(lhs)?; + let rhs = self.eval(rhs)?; + match op { + BinaryOperator::Add => lhs + rhs, + BinaryOperator::Sub => lhs - rhs, + BinaryOperator::Mul => lhs * rhs, + BinaryOperator::Div => lhs / rhs, + BinaryOperator::Pow => todo!(), + BinaryOperator::Equ => Ok(lhs.eq(&rhs).into()), + BinaryOperator::Neq => todo!(), + } + } + Expr::Let { bindings, body } => { + self.env.push(); + for (id, val) in bindings { + let val = self.eval(val)?; + self.env.set(id, val); + } + let res = self.eval(body)?; + self.env.pop(); + Ok(res) + } + Expr::If { + condition, + then, + else_, + } => { + let condition = self.eval(condition)?; + if *(condition.into_type::()?) { + self.eval(then) + } else { + self.eval(else_) + } + } + } + } +} + +pub fn eval<'a>(expr: &'a Expr<'a>) -> Result { + let mut interpreter = Interpreter::new(); + interpreter.eval(expr) +} + +#[cfg(test)] +mod tests { + use std::convert::TryFrom; + + use super::value::{TypeOf, Val}; + use super::*; + use BinaryOperator::*; + + fn int_lit(i: u64) -> Box> { + Box::new(Expr::Literal(Literal::Int(i))) + } + + fn parse_eval(src: &str) -> T + where + for<'a> &'a T: TryFrom<&'a Val>, + T: Clone + TypeOf, + { + let expr = crate::parser::expr(src).unwrap().1; + let res = eval(&expr).unwrap(); + res.into_type::().unwrap().clone() + } + + #[test] + fn simple_addition() { + let expr = Expr::BinaryOp { + lhs: int_lit(1), + op: Mul, + rhs: int_lit(2), + }; + let res = eval(&expr).unwrap(); + assert_eq!(*res.into_type::().unwrap(), 2); + } + + #[test] + fn variable_shadowing() { + let res = parse_eval::("let x = 1 in (let x = 2 in x) + x"); + assert_eq!(res, 3); + } + + #[test] + fn conditional_with_equals() { + let res = parse_eval::("let x = 1 in if x == 1 then 2 else 4"); + assert_eq!(res, 2); + } +} diff --git a/src/interpreter/value.rs b/src/interpreter/value.rs new file mode 100644 index 000000000000..69e4d4ffeb96 --- /dev/null +++ b/src/interpreter/value.rs @@ -0,0 +1,134 @@ +use std::convert::TryFrom; +use std::fmt::{self, Display}; +use std::ops::{Add, Div, Mul, Neg, Sub}; +use std::rc::Rc; + +use derive_more::{Deref, From, TryInto}; + +use super::{Error, Result}; +use crate::ast::Type; + +#[derive(Debug, PartialEq, From, TryInto)] +#[try_into(owned, ref)] +pub enum Val { + Int(i64), + Float(f64), + Bool(bool), +} + +impl From for Val { + fn from(i: u64) -> Self { + Self::from(i as i64) + } +} + +impl Display for Val { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Val::Int(x) => x.fmt(f), + Val::Float(x) => x.fmt(f), + Val::Bool(x) => x.fmt(f), + } + } +} + +impl Val { + pub fn type_(&self) -> Type { + match self { + Val::Int(_) => Type::Int, + Val::Float(_) => Type::Float, + Val::Bool(_) => Type::Bool, + } + } + + pub fn into_type<'a, T>(&'a self) -> Result<&'a T> + where + T: TypeOf + 'a + Clone, + &'a T: TryFrom<&'a Self>, + { + <&T>::try_from(self).map_err(|_| Error::InvalidType { + actual: self.type_(), + expected: ::type_of(), + }) + } +} + +#[derive(Debug, PartialEq, Clone, Deref)] +pub struct Value(Rc); + +impl Display for Value { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +impl From for Value +where + Val: From, +{ + fn from(x: T) -> Self { + Self(Rc::new(x.into())) + } +} + +impl Neg for Value { + type Output = Result; + + fn neg(self) -> Self::Output { + Ok((-self.into_type::()?).into()) + } +} + +impl Add for Value { + type Output = Result; + + fn add(self, rhs: Self) -> Self::Output { + Ok((self.into_type::()? + rhs.into_type::()?).into()) + } +} + +impl Sub for Value { + type Output = Result; + + fn sub(self, rhs: Self) -> Self::Output { + Ok((self.into_type::()? - rhs.into_type::()?).into()) + } +} + +impl Mul for Value { + type Output = Result; + + fn mul(self, rhs: Self) -> Self::Output { + Ok((self.into_type::()? * rhs.into_type::()?).into()) + } +} + +impl Div for Value { + type Output = Result; + + fn div(self, rhs: Self) -> Self::Output { + Ok((self.into_type::()? / rhs.into_type::()?).into()) + } +} + +pub trait TypeOf { + fn type_of() -> Type; +} + +impl TypeOf for i64 { + fn type_of() -> Type { + Type::Int + } +} + +impl TypeOf for bool { + fn type_of() -> Type { + Type::Bool + } +} + +impl TypeOf for f64 { + fn type_of() -> Type { + Type::Float + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 000000000000..c31fda32dbc4 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,31 @@ +use clap::Clap; + +pub mod ast; +pub mod codegen; +pub(crate) mod commands; +pub(crate) mod common; +pub mod compiler; +pub mod interpreter; +pub mod parser; + +pub use common::{Error, Result}; + +#[derive(Clap)] +struct Opts { + #[clap(subcommand)] + subcommand: Command, +} + +#[derive(Clap)] +enum Command { + Eval(commands::Eval), + Compile(commands::Compile), +} + +fn main() -> anyhow::Result<()> { + let opts = Opts::parse(); + match opts.subcommand { + Command::Eval(eval) => Ok(eval.run()?), + Command::Compile(compile) => Ok(compile.run()?), + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 000000000000..811450da0d61 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,445 @@ +use nom::character::complete::{digit1, multispace0, multispace1}; +use nom::error::{ErrorKind, ParseError}; +use nom::{ + alt, char, complete, delimited, do_parse, flat_map, many0, map, named, parse_to, + separated_list0, separated_list1, tag, tuple, +}; +use pratt::{Affix, Associativity, PrattParser, Precedence}; + +use crate::ast::{BinaryOperator, Decl, Expr, Fun, Ident, Literal, UnaryOperator}; + +pub type Error = nom::Err>; + +#[derive(Debug)] +enum TokenTree<'a> { + Prefix(UnaryOperator), + // Postfix(char), + Infix(BinaryOperator), + Primary(Expr<'a>), + Group(Vec>), +} + +named!(prefix(&str) -> TokenTree, map!(alt!( + complete!(char!('-')) => { |_| UnaryOperator::Neg } | + complete!(char!('!')) => { |_| UnaryOperator::Not } +), TokenTree::Prefix)); + +named!(infix(&str) -> TokenTree, map!(alt!( + complete!(tag!("==")) => { |_| BinaryOperator::Equ } | + complete!(tag!("!=")) => { |_| BinaryOperator::Neq } | + complete!(char!('+')) => { |_| BinaryOperator::Add } | + complete!(char!('-')) => { |_| BinaryOperator::Sub } | + complete!(char!('*')) => { |_| BinaryOperator::Mul } | + complete!(char!('/')) => { |_| BinaryOperator::Div } | + complete!(char!('^')) => { |_| BinaryOperator::Pow } +), TokenTree::Infix)); + +named!(primary(&str) -> TokenTree, alt!( + do_parse!( + multispace0 >> + char!('(') >> + multispace0 >> + group: group >> + multispace0 >> + char!(')') >> + multispace0 >> + (TokenTree::Group(group)) + ) | + delimited!(multispace0, simple_expr, multispace0) => { |s| TokenTree::Primary(s) } +)); + +named!( + rest(&str) -> Vec<(TokenTree, Vec, TokenTree)>, + many0!(tuple!( + infix, + delimited!(multispace0, many0!(prefix), multispace0), + primary + // many0!(postfix) + )) +); + +named!(group(&str) -> Vec, do_parse!( + prefix: many0!(prefix) + >> primary: primary + // >> postfix: many0!(postfix) + >> rest: rest + >> ({ + let mut res = prefix; + res.push(primary); + // res.append(&mut postfix); + for (infix, mut prefix, primary/*, mut postfix*/) in rest { + res.push(infix); + res.append(&mut prefix); + res.push(primary); + // res.append(&mut postfix); + } + res + }) +)); + +fn token_tree(i: &str) -> nom::IResult<&str, Vec> { + group(i) +} + +struct ExprParser; + +impl<'a, I> PrattParser for ExprParser +where + I: Iterator>, +{ + type Error = pratt::NoError; + type Input = TokenTree<'a>; + type Output = Expr<'a>; + + fn query(&mut self, input: &Self::Input) -> Result { + use BinaryOperator::*; + use UnaryOperator::*; + + Ok(match input { + TokenTree::Infix(Add) => Affix::Infix(Precedence(6), Associativity::Left), + TokenTree::Infix(Sub) => Affix::Infix(Precedence(6), Associativity::Left), + TokenTree::Infix(Mul) => Affix::Infix(Precedence(7), Associativity::Left), + TokenTree::Infix(Div) => Affix::Infix(Precedence(7), Associativity::Left), + TokenTree::Infix(Pow) => Affix::Infix(Precedence(8), Associativity::Right), + TokenTree::Infix(Equ) => Affix::Infix(Precedence(4), Associativity::Right), + TokenTree::Infix(Neq) => Affix::Infix(Precedence(4), Associativity::Right), + TokenTree::Prefix(Neg) => Affix::Prefix(Precedence(6)), + TokenTree::Prefix(Not) => Affix::Prefix(Precedence(6)), + TokenTree::Primary(_) => Affix::Nilfix, + TokenTree::Group(_) => Affix::Nilfix, + }) + } + + fn primary(&mut self, input: Self::Input) -> Result { + Ok(match input { + TokenTree::Primary(expr) => expr, + TokenTree::Group(group) => self.parse(&mut group.into_iter()).unwrap(), + _ => unreachable!(), + }) + } + + fn infix( + &mut self, + lhs: Self::Output, + op: Self::Input, + rhs: Self::Output, + ) -> Result { + let op = match op { + TokenTree::Infix(op) => op, + _ => unreachable!(), + }; + Ok(Expr::BinaryOp { + lhs: Box::new(lhs), + op, + rhs: Box::new(rhs), + }) + } + + fn prefix(&mut self, op: Self::Input, rhs: Self::Output) -> Result { + let op = match op { + TokenTree::Prefix(op) => op, + _ => unreachable!(), + }; + + Ok(Expr::UnaryOp { + op, + rhs: Box::new(rhs), + }) + } + + fn postfix( + &mut self, + _lhs: Self::Output, + _op: Self::Input, + ) -> Result { + unreachable!() + } +} + +fn ident<'a, E>(i: &'a str) -> nom::IResult<&'a str, Ident, E> +where + E: ParseError<&'a str>, +{ + let mut chars = i.chars(); + if let Some(f) = chars.next() { + if f.is_alphabetic() || f == '_' { + let mut idx = 1; + for c in chars { + if !(c.is_alphanumeric() || c == '_') { + break; + } + idx += 1; + } + Ok((&i[idx..], Ident::from_str_unchecked(&i[..idx]))) + } else { + Err(nom::Err::Error(E::from_error_kind(i, ErrorKind::Satisfy))) + } + } else { + Err(nom::Err::Error(E::from_error_kind(i, ErrorKind::Eof))) + } +} + +named!(int(&str) -> Literal, map!(flat_map!(digit1, parse_to!(u64)), Literal::Int)); + +named!(literal(&str) -> Expr, map!(alt!(int), Expr::Literal)); + +named!(binding(&str) -> (Ident, Expr), do_parse!( + multispace0 + >> ident: ident + >> multispace0 + >> char!('=') + >> multispace0 + >> expr: expr + >> (ident, expr) +)); + +named!(let_(&str) -> Expr, do_parse!( + tag!("let") + >> multispace0 + >> bindings: separated_list1!(alt!(char!(';') | char!('\n')), binding) + >> multispace0 + >> tag!("in") + >> multispace0 + >> body: expr + >> (Expr::Let { + bindings, + body: Box::new(body) + }) +)); + +named!(if_(&str) -> Expr, do_parse! ( + tag!("if") + >> multispace0 + >> condition: expr + >> multispace0 + >> tag!("then") + >> multispace0 + >> then: expr + >> multispace0 + >> tag!("else") + >> multispace0 + >> else_: expr + >> (Expr::If { + condition: Box::new(condition), + then: Box::new(then), + else_: Box::new(else_) + }) +)); + +named!(ident_expr(&str) -> Expr, map!(ident, Expr::Ident)); + +named!(simple_expr(&str) -> Expr, alt!( + let_ | + if_ | + literal | + ident_expr +)); + +named!(pub expr(&str) -> Expr, alt!( + map!(token_tree, |tt| { + ExprParser.parse(&mut tt.into_iter()).unwrap() + }) | + simple_expr)); + +////// + +named!(fun(&str) -> Fun, do_parse!( + tag!("fn") + >> multispace0 + >> name: ident + >> multispace1 + >> args: separated_list0!(multispace1, ident) + >> multispace0 + >> char!('=') + >> multispace0 + >> body: expr + >> (Fun { + name, + args, + body + }) +)); + +named!(pub decl(&str) -> Decl, alt!( + fun => { |f| Decl::Fun(f) } +)); + +named!(pub toplevel(&str) -> Vec, separated_list0!(multispace1, decl)); + +#[cfg(test)] +mod tests { + use std::convert::{TryFrom, TryInto}; + + use super::*; + use BinaryOperator::*; + use Expr::{BinaryOp, If, Let, UnaryOp}; + use UnaryOperator::*; + + fn ident_expr(s: &str) -> Box { + Box::new(Expr::Ident(Ident::try_from(s).unwrap())) + } + + macro_rules! test_parse { + ($parser: ident, $src: expr) => {{ + let (rem, res) = $parser($src).unwrap(); + assert!( + rem.is_empty(), + "non-empty remainder: \"{}\", parsed: {:?}", + rem, + res + ); + res + }}; + } + + mod operators { + use super::*; + + #[test] + fn mul_plus() { + let (rem, res) = expr("x*y+z").unwrap(); + assert!(rem.is_empty()); + assert_eq!( + res, + BinaryOp { + lhs: Box::new(BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: ident_expr("y") + }), + op: Add, + rhs: ident_expr("z") + } + ) + } + + #[test] + fn mul_plus_ws() { + let (rem, res) = expr("x * y + z").unwrap(); + assert!(rem.is_empty(), "non-empty remainder: \"{}\"", rem); + assert_eq!( + res, + BinaryOp { + lhs: Box::new(BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: ident_expr("y") + }), + op: Add, + rhs: ident_expr("z") + } + ) + } + + #[test] + fn unary() { + let (rem, res) = expr("x * -z").unwrap(); + assert!(rem.is_empty(), "non-empty remainder: \"{}\"", rem); + assert_eq!( + res, + BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: Box::new(UnaryOp { + op: Neg, + rhs: ident_expr("z"), + }) + } + ) + } + + #[test] + fn mul_literal() { + let (rem, res) = expr("x * 3").unwrap(); + assert!(rem.is_empty()); + assert_eq!( + res, + BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: Box::new(Expr::Literal(Literal::Int(3))), + } + ) + } + + #[test] + fn equ() { + let res = test_parse!(expr, "x * 7 == 7"); + assert_eq!( + res, + BinaryOp { + lhs: Box::new(BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: Box::new(Expr::Literal(Literal::Int(7))) + }), + op: Equ, + rhs: Box::new(Expr::Literal(Literal::Int(7))) + } + ) + } + } + + #[test] + fn let_complex() { + let res = test_parse!(expr, "let x = 1; y = x * 7 in (x + y) * 4"); + assert_eq!( + res, + Let { + bindings: vec![ + ( + Ident::try_from("x").unwrap(), + Expr::Literal(Literal::Int(1)) + ), + ( + Ident::try_from("y").unwrap(), + Expr::BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: Box::new(Expr::Literal(Literal::Int(7))) + } + ) + ], + body: Box::new(Expr::BinaryOp { + lhs: Box::new(Expr::BinaryOp { + lhs: ident_expr("x"), + op: Add, + rhs: ident_expr("y"), + }), + op: Mul, + rhs: Box::new(Expr::Literal(Literal::Int(4))), + }) + } + ) + } + + #[test] + fn if_simple() { + let res = test_parse!(expr, "if x == 8 then 9 else 20"); + assert_eq!( + res, + If { + condition: Box::new(BinaryOp { + lhs: ident_expr("x"), + op: Equ, + rhs: Box::new(Expr::Literal(Literal::Int(8))), + }), + then: Box::new(Expr::Literal(Literal::Int(9))), + else_: Box::new(Expr::Literal(Literal::Int(20))) + } + ) + } + + #[test] + fn fn_decl() { + let res = test_parse!(decl, "fn id x = x"); + assert_eq!( + res, + Decl::Fun(Fun { + name: "id".try_into().unwrap(), + args: vec!["x".try_into().unwrap()], + body: *ident_expr("x"), + }) + ) + } +} -- cgit 1.4.1 From 1ea2d8ba9fef3bee2c757d19d8b42e541e3fe390 Mon Sep 17 00:00:00 2001 From: Griffin Smith Date: Mon, 8 Mar 2021 00:04:44 -0500 Subject: Implement functions, both top-level and anonymous Implement both top-level and anonymous functions, but not closures in either case. --- Cargo.lock | 10 +++ Cargo.toml | 1 + src/ast/mod.rs | 94 ++++++++++++++++++++---- src/codegen/llvm.rs | 181 +++++++++++++++++++++++++++++++---------------- src/codegen/mod.rs | 4 +- src/commands/compile.rs | 3 + src/common/env.rs | 9 +++ src/interpreter/mod.rs | 56 ++++++++++++--- src/interpreter/value.rs | 101 ++++++++++++++++++-------- src/parser/mod.rs | 167 +++++++++++++++++++++++++++++++++++++++---- 10 files changed, 501 insertions(+), 125 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 485e9f4cdfcb..d8eaedeca181 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,7 @@ dependencies = [ "clap", "derive_more", "inkwell", + "itertools", "llvm-sys", "nom", "nom-trace", @@ -245,6 +246,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "itertools" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d572918e350e82412fe766d24b15e6682fb2ed2bbe018280caa810397cb319" +dependencies = [ + "either", +] + [[package]] name = "lazy_static" version = "1.4.0" diff --git a/Cargo.toml b/Cargo.toml index eda15e554661..c9796a821586 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ anyhow = "1.0.38" clap = "3.0.0-beta.2" derive_more = "0.99.11" inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm11-0"] } +itertools = "0.10.0" llvm-sys = "110.0.1" nom = "6.1.2" nom-trace = { git = "https://github.com/glittershark/nom-trace", branch = "nom-6" } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2dcf955fe67c..7f3543271db8 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2,10 +2,12 @@ use std::borrow::Cow; use std::convert::TryFrom; use std::fmt::{self, Display, Formatter}; +use itertools::Itertools; + #[derive(Debug, PartialEq, Eq)] pub struct InvalidIdentifier<'a>(Cow<'a, str>); -#[derive(Debug, PartialEq, Eq, Hash)] +#[derive(Debug, PartialEq, Eq, Hash, Clone)] pub struct Ident<'a>(pub Cow<'a, str>); impl<'a> From<&'a Ident<'a>> for &'a str { @@ -69,7 +71,7 @@ impl<'a> TryFrom for Ident<'a> { } } -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Copy, Clone)] pub enum BinaryOperator { /// `+` Add, @@ -93,7 +95,7 @@ pub enum BinaryOperator { Neq, } -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Copy, Clone)] pub enum UnaryOperator { /// ! Not, @@ -102,12 +104,12 @@ pub enum UnaryOperator { Neg, } -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone)] pub enum Literal { Int(u64), } -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone)] pub enum Expr<'a> { Ident(Ident<'a>), @@ -134,33 +136,101 @@ pub enum Expr<'a> { then: Box>, else_: Box>, }, + + Fun(Box>), + + Call { + fun: Box>, + args: Vec>, + }, } -#[derive(Debug, PartialEq, Eq)] +impl<'a> Expr<'a> { + pub fn to_owned(&self) -> Expr<'static> { + match self { + Expr::Ident(ref id) => Expr::Ident(id.to_owned()), + Expr::Literal(ref lit) => Expr::Literal(lit.clone()), + Expr::UnaryOp { op, rhs } => Expr::UnaryOp { + op: *op, + rhs: Box::new((**rhs).to_owned()), + }, + Expr::BinaryOp { lhs, op, rhs } => Expr::BinaryOp { + lhs: Box::new((**lhs).to_owned()), + op: *op, + rhs: Box::new((**rhs).to_owned()), + }, + Expr::Let { bindings, body } => Expr::Let { + bindings: bindings + .iter() + .map(|(id, expr)| (id.to_owned(), expr.to_owned())) + .collect(), + body: Box::new((**body).to_owned()), + }, + Expr::If { + condition, + then, + else_, + } => Expr::If { + condition: Box::new((**condition).to_owned()), + then: Box::new((**then).to_owned()), + else_: Box::new((**else_).to_owned()), + }, + Expr::Fun(fun) => Expr::Fun(Box::new((**fun).to_owned())), + Expr::Call { fun, args } => Expr::Call { + fun: Box::new((**fun).to_owned()), + args: args.iter().map(|arg| arg.to_owned()).collect(), + }, + } + } +} + +#[derive(Debug, PartialEq, Eq, Clone)] pub struct Fun<'a> { - pub name: Ident<'a>, pub args: Vec>, pub body: Expr<'a>, } +impl<'a> Fun<'a> { + fn to_owned(&self) -> Fun<'static> { + Fun { + args: self.args.iter().map(|arg| arg.to_owned()).collect(), + body: self.body.to_owned(), + } + } +} + #[derive(Debug, PartialEq, Eq)] pub enum Decl<'a> { - Fun(Fun<'a>), + Fun { name: Ident<'a>, body: Fun<'a> }, +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct FunctionType { + pub args: Vec, + pub ret: Box, +} + +impl Display for FunctionType { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "fn {} -> {}", self.args.iter().join(", "), self.ret) + } } -#[derive(Debug, PartialEq, Eq, Clone, Copy)] +#[derive(Debug, PartialEq, Eq, Clone)] pub enum Type { Int, Float, Bool, + Function(FunctionType), } impl Display for Type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - Self::Int => f.write_str("int"), - Self::Float => f.write_str("float"), - Self::Bool => f.write_str("bool"), + Type::Int => f.write_str("int"), + Type::Float => f.write_str("float"), + Type::Bool => f.write_str("bool"), + Type::Function(ft) => ft.fmt(f), } } } diff --git a/src/codegen/llvm.rs b/src/codegen/llvm.rs index ff92c3373176..1d1c742a9434 100644 --- a/src/codegen/llvm.rs +++ b/src/codegen/llvm.rs @@ -1,3 +1,4 @@ +use std::convert::{TryFrom, TryInto}; use std::path::Path; use std::result; @@ -7,7 +8,7 @@ pub use inkwell::context::Context; use inkwell::module::Module; use inkwell::support::LLVMString; use inkwell::types::FunctionType; -use inkwell::values::{BasicValueEnum, FunctionValue}; +use inkwell::values::{AnyValueEnum, BasicValueEnum, FunctionValue}; use inkwell::IntPredicate; use thiserror::Error; @@ -35,8 +36,9 @@ pub struct Codegen<'ctx, 'ast> { context: &'ctx Context, pub module: Module<'ctx>, builder: Builder<'ctx>, - env: Env<'ast, BasicValueEnum<'ctx>>, - function: Option>, + env: Env<'ast, AnyValueEnum<'ctx>>, + function_stack: Vec>, + identifier_counter: u32, } impl<'ctx, 'ast> Codegen<'ctx, 'ast> { @@ -48,7 +50,8 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { module, builder, env: Default::default(), - function: None, + function_stack: Default::default(), + identifier_counter: 0, } } @@ -57,22 +60,24 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { name: &str, ty: FunctionType<'ctx>, ) -> &'a FunctionValue<'ctx> { - self.function = Some(self.module.add_function(name, ty, None)); + self.function_stack + .push(self.module.add_function(name, ty, None)); let basic_block = self.append_basic_block("entry"); self.builder.position_at_end(basic_block); - self.function.as_ref().unwrap() + self.function_stack.last().unwrap() } - pub fn finish_function(&self, res: &BasicValueEnum<'ctx>) { + pub fn finish_function(&mut self, res: &BasicValueEnum<'ctx>) -> FunctionValue<'ctx> { self.builder.build_return(Some(res)); + self.function_stack.pop().unwrap() } pub fn append_basic_block(&self, name: &str) -> BasicBlock<'ctx> { self.context - .append_basic_block(self.function.unwrap(), name) + .append_basic_block(*self.function_stack.last().unwrap(), name) } - pub fn codegen_expr(&mut self, expr: &'ast Expr<'ast>) -> Result> { + pub fn codegen_expr(&mut self, expr: &'ast Expr<'ast>) -> Result> { match expr { Expr::Ident(id) => self .env @@ -81,13 +86,13 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { .ok_or_else(|| Error::UndefinedVariable(id.to_owned())), Expr::Literal(Literal::Int(i)) => { let ty = self.context.i64_type(); - Ok(BasicValueEnum::IntValue(ty.const_int(*i, false))) + Ok(AnyValueEnum::IntValue(ty.const_int(*i, false))) } Expr::UnaryOp { op, rhs } => { let rhs = self.codegen_expr(rhs)?; match op { UnaryOperator::Not => unimplemented!(), - UnaryOperator::Neg => Ok(BasicValueEnum::IntValue( + UnaryOperator::Neg => Ok(AnyValueEnum::IntValue( self.builder.build_int_neg(rhs.into_int_value(), "neg"), )), } @@ -96,29 +101,23 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { let lhs = self.codegen_expr(lhs)?; let rhs = self.codegen_expr(rhs)?; match op { - BinaryOperator::Add => { - Ok(BasicValueEnum::IntValue(self.builder.build_int_add( - lhs.into_int_value(), - rhs.into_int_value(), - "add", - ))) - } - BinaryOperator::Sub => { - Ok(BasicValueEnum::IntValue(self.builder.build_int_sub( - lhs.into_int_value(), - rhs.into_int_value(), - "add", - ))) - } - BinaryOperator::Mul => { - Ok(BasicValueEnum::IntValue(self.builder.build_int_sub( - lhs.into_int_value(), - rhs.into_int_value(), - "add", - ))) - } + BinaryOperator::Add => Ok(AnyValueEnum::IntValue(self.builder.build_int_add( + lhs.into_int_value(), + rhs.into_int_value(), + "add", + ))), + BinaryOperator::Sub => Ok(AnyValueEnum::IntValue(self.builder.build_int_sub( + lhs.into_int_value(), + rhs.into_int_value(), + "add", + ))), + BinaryOperator::Mul => Ok(AnyValueEnum::IntValue(self.builder.build_int_sub( + lhs.into_int_value(), + rhs.into_int_value(), + "add", + ))), BinaryOperator::Div => { - Ok(BasicValueEnum::IntValue(self.builder.build_int_signed_div( + Ok(AnyValueEnum::IntValue(self.builder.build_int_signed_div( lhs.into_int_value(), rhs.into_int_value(), "add", @@ -126,7 +125,7 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { } BinaryOperator::Pow => unimplemented!(), BinaryOperator::Equ => { - Ok(BasicValueEnum::IntValue(self.builder.build_int_compare( + Ok(AnyValueEnum::IntValue(self.builder.build_int_compare( IntPredicate::EQ, lhs.into_int_value(), rhs.into_int_value(), @@ -170,34 +169,83 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { self.builder.position_at_end(join_block); let phi = self.builder.build_phi(self.context.i64_type(), "join"); - phi.add_incoming(&[(&then_res, then_block), (&else_res, else_block)]); - Ok(phi.as_basic_value()) + phi.add_incoming(&[ + (&BasicValueEnum::try_from(then_res).unwrap(), then_block), + (&BasicValueEnum::try_from(else_res).unwrap(), else_block), + ]); + Ok(phi.as_basic_value().into()) + } + Expr::Call { fun, args } => { + if let Expr::Ident(id) = &**fun { + let function = self + .module + .get_function(id.into()) + .or_else(|| self.env.resolve(id)?.clone().try_into().ok()) + .ok_or_else(|| Error::UndefinedVariable(id.to_owned()))?; + let args = args + .iter() + .map(|arg| Ok(self.codegen_expr(arg)?.try_into().unwrap())) + .collect::>>()?; + Ok(self + .builder + .build_call(function, &args, "call") + .try_as_basic_value() + .left() + .unwrap() + .into()) + } else { + todo!() + } + } + Expr::Fun(fun) => { + let Fun { args, body } = &**fun; + let fname = self.fresh_ident("f"); + let cur_block = self.builder.get_insert_block().unwrap(); + let env = self.env.save(); // TODO: closures + let function = self.codegen_function(&fname, args, body)?; + self.builder.position_at_end(cur_block); + self.env.restore(env); + Ok(function.into()) } } } + pub fn codegen_function( + &mut self, + name: &str, + args: &'ast [Ident<'ast>], + body: &'ast Expr<'ast>, + ) -> Result> { + let i64_type = self.context.i64_type(); + self.new_function( + name, + i64_type.fn_type( + args.iter() + .map(|_| i64_type.into()) + .collect::>() + .as_slice(), + false, + ), + ); + self.env.push(); + for (i, arg) in args.iter().enumerate() { + self.env.set( + arg, + self.cur_function().get_nth_param(i as u32).unwrap().into(), + ); + } + let res = self.codegen_expr(body)?.try_into().unwrap(); + self.env.pop(); + Ok(self.finish_function(&res)) + } + pub fn codegen_decl(&mut self, decl: &'ast Decl<'ast>) -> Result<()> { match decl { - Decl::Fun(Fun { name, args, body }) => { - let i64_type = self.context.i64_type(); - self.new_function( - name.into(), - i64_type.fn_type( - args.iter() - .map(|_| i64_type.into()) - .collect::>() - .as_slice(), - false, - ), - ); - self.env.push(); - for (i, arg) in args.iter().enumerate() { - self.env - .set(arg, self.function.unwrap().get_nth_param(i as u32).unwrap()); - } - let res = self.codegen_expr(body)?; - self.env.pop(); - self.finish_function(&res); + Decl::Fun { + name, + body: Fun { args, body }, + } => { + self.codegen_function(name.into(), args, body)?; Ok(()) } } @@ -205,7 +253,7 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { pub fn codegen_main(&mut self, expr: &'ast Expr<'ast>) -> Result<()> { self.new_function("main", self.context.i64_type().fn_type(&[], false)); - let res = self.codegen_expr(expr)?; + let res = self.codegen_expr(expr)?.try_into().unwrap(); self.finish_function(&res); Ok(()) } @@ -229,6 +277,15 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { )) } } + + fn fresh_ident(&mut self, prefix: &str) -> String { + self.identifier_counter += 1; + format!("{}{}", prefix, self.identifier_counter) + } + + fn cur_function(&self) -> &FunctionValue<'ctx> { + self.function_stack.last().unwrap() + } } #[cfg(test)] @@ -248,9 +305,7 @@ mod tests { .create_jit_execution_engine(OptimizationLevel::None) .unwrap(); - codegen.new_function("test", context.i64_type().fn_type(&[], false)); - let res = codegen.codegen_expr(&expr)?; - codegen.finish_function(&res); + codegen.codegen_function("test", &[], &expr)?; unsafe { let fun: JitFunction T> = @@ -279,4 +334,10 @@ mod tests { 2 ); } + + #[test] + fn function_call() { + let res = jit_eval::("let id = fn x = x in id 1").unwrap(); + assert_eq!(res, 1); + } } diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index 4620b6b48e84..6f95d90b45a1 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -14,9 +14,7 @@ pub fn jit_eval(expr: &Expr) -> Result { .module .create_jit_execution_engine(OptimizationLevel::None) .map_err(Error::from)?; - codegen.new_function("eval", context.i64_type().fn_type(&[], false)); - let res = codegen.codegen_expr(&expr)?; - codegen.finish_function(&res); + codegen.codegen_function("test", &[], &expr)?; unsafe { let fun: JitFunction T> = diff --git a/src/commands/compile.rs b/src/commands/compile.rs index e16b8c87a659..be8767575ab5 100644 --- a/src/commands/compile.rs +++ b/src/commands/compile.rs @@ -5,10 +5,13 @@ use clap::Clap; use crate::common::Result; use crate::compiler::{self, CompilerOptions}; +/// Compile a source file #[derive(Clap)] pub struct Compile { + /// File to compile file: PathBuf, + /// Output file #[clap(short = 'o')] out_file: PathBuf, diff --git a/src/common/env.rs b/src/common/env.rs index 8b5cde49e9e4..f499323639e3 100644 --- a/src/common/env.rs +++ b/src/common/env.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::mem; use crate::ast::Ident; @@ -25,6 +26,14 @@ impl<'ast, V> Env<'ast, V> { self.0.pop(); } + pub fn save(&mut self) -> Self { + mem::take(self) + } + + pub fn restore(&mut self, saved: Self) { + *self = saved; + } + pub fn set(&mut self, k: &'ast Ident<'ast>, v: V) { self.0.last_mut().unwrap().insert(k, v); } diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs index adff3568c2c3..fc1556d1c292 100644 --- a/src/interpreter/mod.rs +++ b/src/interpreter/mod.rs @@ -2,13 +2,13 @@ mod error; mod value; pub use self::error::{Error, Result}; -pub use self::value::Value; -use crate::ast::{BinaryOperator, Expr, Ident, Literal, UnaryOperator}; +pub use self::value::{Function, Value}; +use crate::ast::{BinaryOperator, Expr, FunctionType, Ident, Literal, Type, UnaryOperator}; use crate::common::env::Env; #[derive(Debug, Default)] pub struct Interpreter<'a> { - env: Env<'a, Value>, + env: Env<'a, Value<'a>>, } impl<'a> Interpreter<'a> { @@ -16,14 +16,14 @@ impl<'a> Interpreter<'a> { Self::default() } - fn resolve(&self, var: &'a Ident<'a>) -> Result { + fn resolve(&self, var: &'a Ident<'a>) -> Result> { self.env .resolve(var) .cloned() .ok_or_else(|| Error::UndefinedVariable(var.to_owned())) } - pub fn eval(&mut self, expr: &'a Expr<'a>) -> Result { + pub fn eval(&mut self, expr: &'a Expr<'a>) -> Result> { match expr { Expr::Ident(id) => self.resolve(id), Expr::Literal(Literal::Int(i)) => Ok((*i).into()), @@ -63,12 +63,44 @@ impl<'a> Interpreter<'a> { else_, } => { let condition = self.eval(condition)?; - if *(condition.into_type::()?) { + if *(condition.as_type::()?) { self.eval(then) } else { self.eval(else_) } } + Expr::Call { ref fun, args } => { + let fun = self.eval(fun)?; + let expected_type = FunctionType { + args: args.iter().map(|_| Type::Int).collect(), + ret: Box::new(Type::Int), + }; + + let Function { + args: function_args, + body, + .. + } = fun.as_function(expected_type)?; + let arg_values = function_args.iter().zip( + args.iter() + .map(|v| self.eval(v)) + .collect::>>()?, + ); + let mut interpreter = Interpreter::new(); + for (arg_name, arg_value) in arg_values { + interpreter.env.set(arg_name, arg_value); + } + Ok(Value::from(*interpreter.eval(body)?.as_type::()?)) + } + Expr::Fun(fun) => Ok(Value::from(value::Function { + // TODO + type_: FunctionType { + args: fun.args.iter().map(|_| Type::Int).collect(), + ret: Box::new(Type::Int), + }, + args: fun.args.iter().map(|arg| arg.to_owned()).collect(), + body: fun.body.to_owned(), + })), } } } @@ -92,12 +124,12 @@ mod tests { fn parse_eval(src: &str) -> T where - for<'a> &'a T: TryFrom<&'a Val>, + for<'a> &'a T: TryFrom<&'a Val<'a>>, T: Clone + TypeOf, { let expr = crate::parser::expr(src).unwrap().1; let res = eval(&expr).unwrap(); - res.into_type::().unwrap().clone() + res.as_type::().unwrap().clone() } #[test] @@ -108,7 +140,7 @@ mod tests { rhs: int_lit(2), }; let res = eval(&expr).unwrap(); - assert_eq!(*res.into_type::().unwrap(), 2); + assert_eq!(*res.as_type::().unwrap(), 2); } #[test] @@ -122,4 +154,10 @@ mod tests { let res = parse_eval::("let x = 1 in if x == 1 then 2 else 4"); assert_eq!(res, 2); } + + #[test] + fn function_call() { + let res = parse_eval::("let id = fn x = x in id 1"); + assert_eq!(res, 1); + } } diff --git a/src/interpreter/value.rs b/src/interpreter/value.rs index 69e4d4ffeb96..496e9c4230de 100644 --- a/src/interpreter/value.rs +++ b/src/interpreter/value.rs @@ -6,108 +6,153 @@ use std::rc::Rc; use derive_more::{Deref, From, TryInto}; use super::{Error, Result}; -use crate::ast::Type; +use crate::ast::{Expr, FunctionType, Ident, Type}; -#[derive(Debug, PartialEq, From, TryInto)] +#[derive(Debug, Clone)] +pub struct Function<'a> { + pub type_: FunctionType, + pub args: Vec>, + pub body: Expr<'a>, +} + +#[derive(From, TryInto)] #[try_into(owned, ref)] -pub enum Val { +pub enum Val<'a> { Int(i64), Float(f64), Bool(bool), + Function(Function<'a>), +} + +impl<'a> fmt::Debug for Val<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Val::Int(x) => f.debug_tuple("Int").field(x).finish(), + Val::Float(x) => f.debug_tuple("Float").field(x).finish(), + Val::Bool(x) => f.debug_tuple("Bool").field(x).finish(), + Val::Function(Function { type_, .. }) => { + f.debug_struct("Function").field("type_", type_).finish() + } + } + } } -impl From for Val { +impl<'a> PartialEq for Val<'a> { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Val::Int(x), Val::Int(y)) => x == y, + (Val::Float(x), Val::Float(y)) => x == y, + (Val::Bool(x), Val::Bool(y)) => x == y, + (Val::Function(_), Val::Function(_)) => false, + (_, _) => false, + } + } +} + +impl<'a> From for Val<'a> { fn from(i: u64) -> Self { Self::from(i as i64) } } -impl Display for Val { +impl<'a> Display for Val<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Val::Int(x) => x.fmt(f), Val::Float(x) => x.fmt(f), Val::Bool(x) => x.fmt(f), + Val::Function(Function { type_, .. }) => write!(f, "<{}>", type_), } } } -impl Val { +impl<'a> Val<'a> { pub fn type_(&self) -> Type { match self { Val::Int(_) => Type::Int, Val::Float(_) => Type::Float, Val::Bool(_) => Type::Bool, + Val::Function(Function { type_, .. }) => Type::Function(type_.clone()), } } - pub fn into_type<'a, T>(&'a self) -> Result<&'a T> + pub fn as_type<'b, T>(&'b self) -> Result<&'b T> where - T: TypeOf + 'a + Clone, - &'a T: TryFrom<&'a Self>, + T: TypeOf + 'b + Clone, + &'b T: TryFrom<&'b Self>, { <&T>::try_from(self).map_err(|_| Error::InvalidType { actual: self.type_(), expected: ::type_of(), }) } + + pub fn as_function<'b>(&'b self, function_type: FunctionType) -> Result<&'b Function<'a>> { + match self { + Val::Function(f) if f.type_ == function_type => Ok(&f), + _ => Err(Error::InvalidType { + actual: self.type_(), + expected: Type::Function(function_type), + }), + } + } } #[derive(Debug, PartialEq, Clone, Deref)] -pub struct Value(Rc); +pub struct Value<'a>(Rc>); -impl Display for Value { +impl<'a> Display for Value<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.0.fmt(f) } } -impl From for Value +impl<'a, T> From for Value<'a> where - Val: From, + Val<'a>: From, { fn from(x: T) -> Self { Self(Rc::new(x.into())) } } -impl Neg for Value { - type Output = Result; +impl<'a> Neg for Value<'a> { + type Output = Result>; fn neg(self) -> Self::Output { - Ok((-self.into_type::()?).into()) + Ok((-self.as_type::()?).into()) } } -impl Add for Value { - type Output = Result; +impl<'a> Add for Value<'a> { + type Output = Result>; fn add(self, rhs: Self) -> Self::Output { - Ok((self.into_type::()? + rhs.into_type::()?).into()) + Ok((self.as_type::()? + rhs.as_type::()?).into()) } } -impl Sub for Value { - type Output = Result; +impl<'a> Sub for Value<'a> { + type Output = Result>; fn sub(self, rhs: Self) -> Self::Output { - Ok((self.into_type::()? - rhs.into_type::()?).into()) + Ok((self.as_type::()? - rhs.as_type::()?).into()) } } -impl Mul for Value { - type Output = Result; +impl<'a> Mul for Value<'a> { + type Output = Result>; fn mul(self, rhs: Self) -> Self::Output { - Ok((self.into_type::()? * rhs.into_type::()?).into()) + Ok((self.as_type::()? * rhs.as_type::()?).into()) } } -impl Div for Value { - type Output = Result; +impl<'a> Div for Value<'a> { + type Output = Result>; fn div(self, rhs: Self) -> Self::Output { - Ok((self.into_type::()? / rhs.into_type::()?).into()) + Ok((self.as_type::()? / rhs.as_type::()?).into()) } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 811450da0d61..be432b8adf56 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -156,6 +156,10 @@ where } } +fn is_reserved(s: &str) -> bool { + matches!(s, "if" | "then" | "else" | "let" | "in" | "fn") +} + fn ident<'a, E>(i: &'a str) -> nom::IResult<&'a str, Ident, E> where E: ParseError<&'a str>, @@ -170,7 +174,12 @@ where } idx += 1; } - Ok((&i[idx..], Ident::from_str_unchecked(&i[..idx]))) + let id = &i[..idx]; + if is_reserved(id) { + Err(nom::Err::Error(E::from_error_kind(i, ErrorKind::Satisfy))) + } else { + Ok((&i[idx..], Ident::from_str_unchecked(id))) + } } else { Err(nom::Err::Error(E::from_error_kind(i, ErrorKind::Satisfy))) } @@ -228,14 +237,65 @@ named!(if_(&str) -> Expr, do_parse! ( named!(ident_expr(&str) -> Expr, map!(ident, Expr::Ident)); +named!(paren_expr(&str) -> Expr, + delimited!(complete!(tag!("(")), expr, complete!(tag!(")")))); + +named!(funcref(&str) -> Expr, alt!( + ident_expr | + paren_expr +)); + +named!(no_arg_call(&str) -> Expr, do_parse!( + fun: funcref + >> multispace0 + >> complete!(tag!("()")) + >> (Expr::Call { + fun: Box::new(fun), + args: vec![], + }) +)); + +named!(fun_expr(&str) -> Expr, do_parse!( + tag!("fn") + >> multispace1 + >> args: separated_list0!(multispace1, ident) + >> multispace0 + >> char!('=') + >> multispace0 + >> body: expr + >> (Expr::Fun(Box::new(Fun { + args, + body + }))) +)); + +named!(arg(&str) -> Expr, alt!( + ident_expr | + literal | + paren_expr +)); + +named!(call_with_args(&str) -> Expr, do_parse!( + fun: funcref + >> multispace1 + >> args: separated_list1!(multispace1, arg) + >> (Expr::Call { + fun: Box::new(fun), + args + }) +)); + named!(simple_expr(&str) -> Expr, alt!( let_ | if_ | + fun_expr | literal | ident_expr )); named!(pub expr(&str) -> Expr, alt!( + no_arg_call | + call_with_args | map!(token_tree, |tt| { ExprParser.parse(&mut tt.into_iter()).unwrap() }) | @@ -243,8 +303,8 @@ named!(pub expr(&str) -> Expr, alt!( ////// -named!(fun(&str) -> Fun, do_parse!( - tag!("fn") +named!(fun_decl(&str) -> Decl, do_parse!( + complete!(tag!("fn")) >> multispace0 >> name: ident >> multispace1 @@ -253,21 +313,24 @@ named!(fun(&str) -> Fun, do_parse!( >> char!('=') >> multispace0 >> body: expr - >> (Fun { + >> (Decl::Fun { name, - args, - body + body: Fun { + args, + body + } }) )); named!(pub decl(&str) -> Decl, alt!( - fun => { |f| Decl::Fun(f) } + fun_decl )); -named!(pub toplevel(&str) -> Vec, separated_list0!(multispace1, decl)); +named!(pub toplevel(&str) -> Vec, many0!(decl)); #[cfg(test)] mod tests { + use nom_trace::print_trace; use std::convert::{TryFrom, TryInto}; use super::*; @@ -281,7 +344,9 @@ mod tests { macro_rules! test_parse { ($parser: ident, $src: expr) => {{ - let (rem, res) = $parser($src).unwrap(); + let res = $parser($src); + print_trace!(); + let (rem, res) = res.unwrap(); assert!( rem.is_empty(), "non-empty remainder: \"{}\", parsed: {:?}", @@ -435,11 +500,87 @@ mod tests { let res = test_parse!(decl, "fn id x = x"); assert_eq!( res, - Decl::Fun(Fun { + Decl::Fun { name: "id".try_into().unwrap(), - args: vec!["x".try_into().unwrap()], - body: *ident_expr("x"), - }) + body: Fun { + args: vec!["x".try_into().unwrap()], + body: *ident_expr("x"), + } + } ) } + + #[test] + fn no_arg_call() { + let res = test_parse!(expr, "f()"); + assert_eq!( + res, + Expr::Call { + fun: ident_expr("f"), + args: vec![] + } + ); + } + + #[test] + fn call_with_args() { + let res = test_parse!(expr, "f x 1"); + assert_eq!( + res, + Expr::Call { + fun: ident_expr("f"), + args: vec![*ident_expr("x"), Expr::Literal(Literal::Int(1))] + } + ) + } + + #[test] + fn call_funcref() { + let res = test_parse!(expr, "(let x = 1 in x) 2"); + assert_eq!( + res, + Expr::Call { + fun: Box::new(Expr::Let { + bindings: vec![( + Ident::try_from("x").unwrap(), + Expr::Literal(Literal::Int(1)) + )], + body: ident_expr("x") + }), + args: vec![Expr::Literal(Literal::Int(2))] + } + ) + } + + #[test] + fn anon_function() { + let res = test_parse!(expr, "let id = fn x = x in id 1"); + assert_eq!( + res, + Expr::Let { + bindings: vec![( + Ident::try_from("id").unwrap(), + Expr::Fun(Box::new(Fun { + args: vec![Ident::try_from("x").unwrap()], + body: *ident_expr("x") + })) + )], + body: Box::new(Expr::Call { + fun: ident_expr("id"), + args: vec![Expr::Literal(Literal::Int(1))], + }) + } + ); + } + + #[test] + fn multiple_decls() { + let res = test_parse!( + toplevel, + "fn id x = x + fn plus x y = x + y + fn main = plus (id 2) 7" + ); + assert_eq!(res.len(), 3); + } } -- cgit 1.4.1 From 3dff189499af1ddd60d8fc128b794d15f1cb19ae Mon Sep 17 00:00:00 2001 From: Griffin Smith Date: Sat, 13 Mar 2021 12:19:44 -0500 Subject: Factor out expr parser into its own module --- src/main.rs | 1 + src/parser/expr.rs | 482 ++++++++++++++++++++++++++++++++++++++++++++++++ src/parser/macros.rs | 15 ++ src/parser/mod.rs | 507 ++------------------------------------------------- 4 files changed, 510 insertions(+), 495 deletions(-) create mode 100644 src/parser/expr.rs create mode 100644 src/parser/macros.rs diff --git a/src/main.rs b/src/main.rs index c31fda32dbc4..b539ebbb3d99 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,6 +6,7 @@ pub(crate) mod commands; pub(crate) mod common; pub mod compiler; pub mod interpreter; +#[macro_use] pub mod parser; pub use common::{Error, Result}; diff --git a/src/parser/expr.rs b/src/parser/expr.rs new file mode 100644 index 000000000000..a42c7a6c765e --- /dev/null +++ b/src/parser/expr.rs @@ -0,0 +1,482 @@ +use nom::character::complete::{digit1, multispace0, multispace1}; +use nom::{ + alt, char, complete, delimited, do_parse, flat_map, many0, map, named, parse_to, + separated_list0, separated_list1, tag, tuple, +}; +use pratt::{Affix, Associativity, PrattParser, Precedence}; + +use crate::ast::{BinaryOperator, Expr, Fun, Ident, Literal, UnaryOperator}; +use crate::parser::ident; + +#[derive(Debug)] +enum TokenTree<'a> { + Prefix(UnaryOperator), + // Postfix(char), + Infix(BinaryOperator), + Primary(Expr<'a>), + Group(Vec>), +} + +named!(prefix(&str) -> TokenTree, map!(alt!( + complete!(char!('-')) => { |_| UnaryOperator::Neg } | + complete!(char!('!')) => { |_| UnaryOperator::Not } +), TokenTree::Prefix)); + +named!(infix(&str) -> TokenTree, map!(alt!( + complete!(tag!("==")) => { |_| BinaryOperator::Equ } | + complete!(tag!("!=")) => { |_| BinaryOperator::Neq } | + complete!(char!('+')) => { |_| BinaryOperator::Add } | + complete!(char!('-')) => { |_| BinaryOperator::Sub } | + complete!(char!('*')) => { |_| BinaryOperator::Mul } | + complete!(char!('/')) => { |_| BinaryOperator::Div } | + complete!(char!('^')) => { |_| BinaryOperator::Pow } +), TokenTree::Infix)); + +named!(primary(&str) -> TokenTree, alt!( + do_parse!( + multispace0 >> + char!('(') >> + multispace0 >> + group: group >> + multispace0 >> + char!(')') >> + multispace0 >> + (TokenTree::Group(group)) + ) | + delimited!(multispace0, simple_expr, multispace0) => { |s| TokenTree::Primary(s) } +)); + +named!( + rest(&str) -> Vec<(TokenTree, Vec, TokenTree)>, + many0!(tuple!( + infix, + delimited!(multispace0, many0!(prefix), multispace0), + primary + // many0!(postfix) + )) +); + +named!(group(&str) -> Vec, do_parse!( + prefix: many0!(prefix) + >> primary: primary + // >> postfix: many0!(postfix) + >> rest: rest + >> ({ + let mut res = prefix; + res.push(primary); + // res.append(&mut postfix); + for (infix, mut prefix, primary/*, mut postfix*/) in rest { + res.push(infix); + res.append(&mut prefix); + res.push(primary); + // res.append(&mut postfix); + } + res + }) +)); + +fn token_tree(i: &str) -> nom::IResult<&str, Vec> { + group(i) +} + +struct ExprParser; + +impl<'a, I> PrattParser for ExprParser +where + I: Iterator>, +{ + type Error = pratt::NoError; + type Input = TokenTree<'a>; + type Output = Expr<'a>; + + fn query(&mut self, input: &Self::Input) -> Result { + use BinaryOperator::*; + use UnaryOperator::*; + + Ok(match input { + TokenTree::Infix(Add) => Affix::Infix(Precedence(6), Associativity::Left), + TokenTree::Infix(Sub) => Affix::Infix(Precedence(6), Associativity::Left), + TokenTree::Infix(Mul) => Affix::Infix(Precedence(7), Associativity::Left), + TokenTree::Infix(Div) => Affix::Infix(Precedence(7), Associativity::Left), + TokenTree::Infix(Pow) => Affix::Infix(Precedence(8), Associativity::Right), + TokenTree::Infix(Equ) => Affix::Infix(Precedence(4), Associativity::Right), + TokenTree::Infix(Neq) => Affix::Infix(Precedence(4), Associativity::Right), + TokenTree::Prefix(Neg) => Affix::Prefix(Precedence(6)), + TokenTree::Prefix(Not) => Affix::Prefix(Precedence(6)), + TokenTree::Primary(_) => Affix::Nilfix, + TokenTree::Group(_) => Affix::Nilfix, + }) + } + + fn primary(&mut self, input: Self::Input) -> Result { + Ok(match input { + TokenTree::Primary(expr) => expr, + TokenTree::Group(group) => self.parse(&mut group.into_iter()).unwrap(), + _ => unreachable!(), + }) + } + + fn infix( + &mut self, + lhs: Self::Output, + op: Self::Input, + rhs: Self::Output, + ) -> Result { + let op = match op { + TokenTree::Infix(op) => op, + _ => unreachable!(), + }; + Ok(Expr::BinaryOp { + lhs: Box::new(lhs), + op, + rhs: Box::new(rhs), + }) + } + + fn prefix(&mut self, op: Self::Input, rhs: Self::Output) -> Result { + let op = match op { + TokenTree::Prefix(op) => op, + _ => unreachable!(), + }; + + Ok(Expr::UnaryOp { + op, + rhs: Box::new(rhs), + }) + } + + fn postfix( + &mut self, + _lhs: Self::Output, + _op: Self::Input, + ) -> Result { + unreachable!() + } +} + +named!(int(&str) -> Literal, map!(flat_map!(digit1, parse_to!(u64)), Literal::Int)); + +named!(literal(&str) -> Expr, map!(alt!(int), Expr::Literal)); + +named!(binding(&str) -> (Ident, Expr), do_parse!( + multispace0 + >> ident: ident + >> multispace0 + >> char!('=') + >> multispace0 + >> expr: expr + >> (ident, expr) +)); + +named!(let_(&str) -> Expr, do_parse!( + tag!("let") + >> multispace0 + >> bindings: separated_list1!(alt!(char!(';') | char!('\n')), binding) + >> multispace0 + >> tag!("in") + >> multispace0 + >> body: expr + >> (Expr::Let { + bindings, + body: Box::new(body) + }) +)); + +named!(if_(&str) -> Expr, do_parse! ( + tag!("if") + >> multispace0 + >> condition: expr + >> multispace0 + >> tag!("then") + >> multispace0 + >> then: expr + >> multispace0 + >> tag!("else") + >> multispace0 + >> else_: expr + >> (Expr::If { + condition: Box::new(condition), + then: Box::new(then), + else_: Box::new(else_) + }) +)); + +named!(ident_expr(&str) -> Expr, map!(ident, Expr::Ident)); + +named!(paren_expr(&str) -> Expr, + delimited!(complete!(tag!("(")), expr, complete!(tag!(")")))); + +named!(funcref(&str) -> Expr, alt!( + ident_expr | + paren_expr +)); + +named!(no_arg_call(&str) -> Expr, do_parse!( + fun: funcref + >> multispace0 + >> complete!(tag!("()")) + >> (Expr::Call { + fun: Box::new(fun), + args: vec![], + }) +)); + +named!(fun_expr(&str) -> Expr, do_parse!( + tag!("fn") + >> multispace1 + >> args: separated_list0!(multispace1, ident) + >> multispace0 + >> char!('=') + >> multispace0 + >> body: expr + >> (Expr::Fun(Box::new(Fun { + args, + body + }))) +)); + +named!(arg(&str) -> Expr, alt!( + ident_expr | + literal | + paren_expr +)); + +named!(call_with_args(&str) -> Expr, do_parse!( + fun: funcref + >> multispace1 + >> args: separated_list1!(multispace1, arg) + >> (Expr::Call { + fun: Box::new(fun), + args + }) +)); + +named!(simple_expr(&str) -> Expr, alt!( + let_ | + if_ | + fun_expr | + literal | + ident_expr +)); + +named!(pub expr(&str) -> Expr, alt!( + no_arg_call | + call_with_args | + map!(token_tree, |tt| { + ExprParser.parse(&mut tt.into_iter()).unwrap() + }) | + simple_expr)); + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + use std::convert::TryFrom; + use BinaryOperator::*; + use Expr::{BinaryOp, If, Let, UnaryOp}; + use UnaryOperator::*; + + pub(crate) fn ident_expr(s: &str) -> Box { + Box::new(Expr::Ident(Ident::try_from(s).unwrap())) + } + + mod operators { + use super::*; + + #[test] + fn mul_plus() { + let (rem, res) = expr("x*y+z").unwrap(); + assert!(rem.is_empty()); + assert_eq!( + res, + BinaryOp { + lhs: Box::new(BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: ident_expr("y") + }), + op: Add, + rhs: ident_expr("z") + } + ) + } + + #[test] + fn mul_plus_ws() { + let (rem, res) = expr("x * y + z").unwrap(); + assert!(rem.is_empty(), "non-empty remainder: \"{}\"", rem); + assert_eq!( + res, + BinaryOp { + lhs: Box::new(BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: ident_expr("y") + }), + op: Add, + rhs: ident_expr("z") + } + ) + } + + #[test] + fn unary() { + let (rem, res) = expr("x * -z").unwrap(); + assert!(rem.is_empty(), "non-empty remainder: \"{}\"", rem); + assert_eq!( + res, + BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: Box::new(UnaryOp { + op: Neg, + rhs: ident_expr("z"), + }) + } + ) + } + + #[test] + fn mul_literal() { + let (rem, res) = expr("x * 3").unwrap(); + assert!(rem.is_empty()); + assert_eq!( + res, + BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: Box::new(Expr::Literal(Literal::Int(3))), + } + ) + } + + #[test] + fn equ() { + let res = test_parse!(expr, "x * 7 == 7"); + assert_eq!( + res, + BinaryOp { + lhs: Box::new(BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: Box::new(Expr::Literal(Literal::Int(7))) + }), + op: Equ, + rhs: Box::new(Expr::Literal(Literal::Int(7))) + } + ) + } + } + + #[test] + fn let_complex() { + let res = test_parse!(expr, "let x = 1; y = x * 7 in (x + y) * 4"); + assert_eq!( + res, + Let { + bindings: vec![ + ( + Ident::try_from("x").unwrap(), + Expr::Literal(Literal::Int(1)) + ), + ( + Ident::try_from("y").unwrap(), + Expr::BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: Box::new(Expr::Literal(Literal::Int(7))) + } + ) + ], + body: Box::new(Expr::BinaryOp { + lhs: Box::new(Expr::BinaryOp { + lhs: ident_expr("x"), + op: Add, + rhs: ident_expr("y"), + }), + op: Mul, + rhs: Box::new(Expr::Literal(Literal::Int(4))), + }) + } + ) + } + + #[test] + fn if_simple() { + let res = test_parse!(expr, "if x == 8 then 9 else 20"); + assert_eq!( + res, + If { + condition: Box::new(BinaryOp { + lhs: ident_expr("x"), + op: Equ, + rhs: Box::new(Expr::Literal(Literal::Int(8))), + }), + then: Box::new(Expr::Literal(Literal::Int(9))), + else_: Box::new(Expr::Literal(Literal::Int(20))) + } + ) + } + + #[test] + fn no_arg_call() { + let res = test_parse!(expr, "f()"); + assert_eq!( + res, + Expr::Call { + fun: ident_expr("f"), + args: vec![] + } + ); + } + + #[test] + fn call_with_args() { + let res = test_parse!(expr, "f x 1"); + assert_eq!( + res, + Expr::Call { + fun: ident_expr("f"), + args: vec![*ident_expr("x"), Expr::Literal(Literal::Int(1))] + } + ) + } + + #[test] + fn call_funcref() { + let res = test_parse!(expr, "(let x = 1 in x) 2"); + assert_eq!( + res, + Expr::Call { + fun: Box::new(Expr::Let { + bindings: vec![( + Ident::try_from("x").unwrap(), + Expr::Literal(Literal::Int(1)) + )], + body: ident_expr("x") + }), + args: vec![Expr::Literal(Literal::Int(2))] + } + ) + } + + #[test] + fn anon_function() { + let res = test_parse!(expr, "let id = fn x = x in id 1"); + assert_eq!( + res, + Expr::Let { + bindings: vec![( + Ident::try_from("id").unwrap(), + Expr::Fun(Box::new(Fun { + args: vec![Ident::try_from("x").unwrap()], + body: *ident_expr("x") + })) + )], + body: Box::new(Expr::Call { + fun: ident_expr("id"), + args: vec![Expr::Literal(Literal::Int(1))], + }) + } + ); + } +} diff --git a/src/parser/macros.rs b/src/parser/macros.rs new file mode 100644 index 000000000000..60db5133dc0f --- /dev/null +++ b/src/parser/macros.rs @@ -0,0 +1,15 @@ +#[macro_use] +macro_rules! test_parse { + ($parser: ident, $src: expr) => {{ + let res = $parser($src); + nom_trace::print_trace!(); + let (rem, res) = res.unwrap(); + assert!( + rem.is_empty(), + "non-empty remainder: \"{}\", parsed: {:?}", + rem, + res + ); + res + }}; +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index be432b8adf56..3e162d449320 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,166 +1,21 @@ -use nom::character::complete::{digit1, multispace0, multispace1}; +use nom::character::complete::{multispace0, multispace1}; use nom::error::{ErrorKind, ParseError}; -use nom::{ - alt, char, complete, delimited, do_parse, flat_map, many0, map, named, parse_to, - separated_list0, separated_list1, tag, tuple, -}; -use pratt::{Affix, Associativity, PrattParser, Precedence}; +use nom::{alt, char, complete, do_parse, many0, named, separated_list0, tag}; -use crate::ast::{BinaryOperator, Decl, Expr, Fun, Ident, Literal, UnaryOperator}; +#[macro_use] +mod macros; +mod expr; -pub type Error = nom::Err>; - -#[derive(Debug)] -enum TokenTree<'a> { - Prefix(UnaryOperator), - // Postfix(char), - Infix(BinaryOperator), - Primary(Expr<'a>), - Group(Vec>), -} - -named!(prefix(&str) -> TokenTree, map!(alt!( - complete!(char!('-')) => { |_| UnaryOperator::Neg } | - complete!(char!('!')) => { |_| UnaryOperator::Not } -), TokenTree::Prefix)); - -named!(infix(&str) -> TokenTree, map!(alt!( - complete!(tag!("==")) => { |_| BinaryOperator::Equ } | - complete!(tag!("!=")) => { |_| BinaryOperator::Neq } | - complete!(char!('+')) => { |_| BinaryOperator::Add } | - complete!(char!('-')) => { |_| BinaryOperator::Sub } | - complete!(char!('*')) => { |_| BinaryOperator::Mul } | - complete!(char!('/')) => { |_| BinaryOperator::Div } | - complete!(char!('^')) => { |_| BinaryOperator::Pow } -), TokenTree::Infix)); - -named!(primary(&str) -> TokenTree, alt!( - do_parse!( - multispace0 >> - char!('(') >> - multispace0 >> - group: group >> - multispace0 >> - char!(')') >> - multispace0 >> - (TokenTree::Group(group)) - ) | - delimited!(multispace0, simple_expr, multispace0) => { |s| TokenTree::Primary(s) } -)); - -named!( - rest(&str) -> Vec<(TokenTree, Vec, TokenTree)>, - many0!(tuple!( - infix, - delimited!(multispace0, many0!(prefix), multispace0), - primary - // many0!(postfix) - )) -); - -named!(group(&str) -> Vec, do_parse!( - prefix: many0!(prefix) - >> primary: primary - // >> postfix: many0!(postfix) - >> rest: rest - >> ({ - let mut res = prefix; - res.push(primary); - // res.append(&mut postfix); - for (infix, mut prefix, primary/*, mut postfix*/) in rest { - res.push(infix); - res.append(&mut prefix); - res.push(primary); - // res.append(&mut postfix); - } - res - }) -)); - -fn token_tree(i: &str) -> nom::IResult<&str, Vec> { - group(i) -} - -struct ExprParser; - -impl<'a, I> PrattParser for ExprParser -where - I: Iterator>, -{ - type Error = pratt::NoError; - type Input = TokenTree<'a>; - type Output = Expr<'a>; - - fn query(&mut self, input: &Self::Input) -> Result { - use BinaryOperator::*; - use UnaryOperator::*; +use crate::ast::{Decl, Fun, Ident}; +pub use expr::expr; - Ok(match input { - TokenTree::Infix(Add) => Affix::Infix(Precedence(6), Associativity::Left), - TokenTree::Infix(Sub) => Affix::Infix(Precedence(6), Associativity::Left), - TokenTree::Infix(Mul) => Affix::Infix(Precedence(7), Associativity::Left), - TokenTree::Infix(Div) => Affix::Infix(Precedence(7), Associativity::Left), - TokenTree::Infix(Pow) => Affix::Infix(Precedence(8), Associativity::Right), - TokenTree::Infix(Equ) => Affix::Infix(Precedence(4), Associativity::Right), - TokenTree::Infix(Neq) => Affix::Infix(Precedence(4), Associativity::Right), - TokenTree::Prefix(Neg) => Affix::Prefix(Precedence(6)), - TokenTree::Prefix(Not) => Affix::Prefix(Precedence(6)), - TokenTree::Primary(_) => Affix::Nilfix, - TokenTree::Group(_) => Affix::Nilfix, - }) - } - - fn primary(&mut self, input: Self::Input) -> Result { - Ok(match input { - TokenTree::Primary(expr) => expr, - TokenTree::Group(group) => self.parse(&mut group.into_iter()).unwrap(), - _ => unreachable!(), - }) - } - - fn infix( - &mut self, - lhs: Self::Output, - op: Self::Input, - rhs: Self::Output, - ) -> Result { - let op = match op { - TokenTree::Infix(op) => op, - _ => unreachable!(), - }; - Ok(Expr::BinaryOp { - lhs: Box::new(lhs), - op, - rhs: Box::new(rhs), - }) - } - - fn prefix(&mut self, op: Self::Input, rhs: Self::Output) -> Result { - let op = match op { - TokenTree::Prefix(op) => op, - _ => unreachable!(), - }; - - Ok(Expr::UnaryOp { - op, - rhs: Box::new(rhs), - }) - } - - fn postfix( - &mut self, - _lhs: Self::Output, - _op: Self::Input, - ) -> Result { - unreachable!() - } -} +pub type Error = nom::Err>; -fn is_reserved(s: &str) -> bool { +pub(crate) fn is_reserved(s: &str) -> bool { matches!(s, "if" | "then" | "else" | "let" | "in" | "fn") } -fn ident<'a, E>(i: &'a str) -> nom::IResult<&'a str, Ident, E> +pub(crate) fn ident<'a, E>(i: &'a str) -> nom::IResult<&'a str, Ident, E> where E: ParseError<&'a str>, { @@ -188,121 +43,6 @@ where } } -named!(int(&str) -> Literal, map!(flat_map!(digit1, parse_to!(u64)), Literal::Int)); - -named!(literal(&str) -> Expr, map!(alt!(int), Expr::Literal)); - -named!(binding(&str) -> (Ident, Expr), do_parse!( - multispace0 - >> ident: ident - >> multispace0 - >> char!('=') - >> multispace0 - >> expr: expr - >> (ident, expr) -)); - -named!(let_(&str) -> Expr, do_parse!( - tag!("let") - >> multispace0 - >> bindings: separated_list1!(alt!(char!(';') | char!('\n')), binding) - >> multispace0 - >> tag!("in") - >> multispace0 - >> body: expr - >> (Expr::Let { - bindings, - body: Box::new(body) - }) -)); - -named!(if_(&str) -> Expr, do_parse! ( - tag!("if") - >> multispace0 - >> condition: expr - >> multispace0 - >> tag!("then") - >> multispace0 - >> then: expr - >> multispace0 - >> tag!("else") - >> multispace0 - >> else_: expr - >> (Expr::If { - condition: Box::new(condition), - then: Box::new(then), - else_: Box::new(else_) - }) -)); - -named!(ident_expr(&str) -> Expr, map!(ident, Expr::Ident)); - -named!(paren_expr(&str) -> Expr, - delimited!(complete!(tag!("(")), expr, complete!(tag!(")")))); - -named!(funcref(&str) -> Expr, alt!( - ident_expr | - paren_expr -)); - -named!(no_arg_call(&str) -> Expr, do_parse!( - fun: funcref - >> multispace0 - >> complete!(tag!("()")) - >> (Expr::Call { - fun: Box::new(fun), - args: vec![], - }) -)); - -named!(fun_expr(&str) -> Expr, do_parse!( - tag!("fn") - >> multispace1 - >> args: separated_list0!(multispace1, ident) - >> multispace0 - >> char!('=') - >> multispace0 - >> body: expr - >> (Expr::Fun(Box::new(Fun { - args, - body - }))) -)); - -named!(arg(&str) -> Expr, alt!( - ident_expr | - literal | - paren_expr -)); - -named!(call_with_args(&str) -> Expr, do_parse!( - fun: funcref - >> multispace1 - >> args: separated_list1!(multispace1, arg) - >> (Expr::Call { - fun: Box::new(fun), - args - }) -)); - -named!(simple_expr(&str) -> Expr, alt!( - let_ | - if_ | - fun_expr | - literal | - ident_expr -)); - -named!(pub expr(&str) -> Expr, alt!( - no_arg_call | - call_with_args | - map!(token_tree, |tt| { - ExprParser.parse(&mut tt.into_iter()).unwrap() - }) | - simple_expr)); - -////// - named!(fun_decl(&str) -> Decl, do_parse!( complete!(tag!("fn")) >> multispace0 @@ -330,170 +70,10 @@ named!(pub toplevel(&str) -> Vec, many0!(decl)); #[cfg(test)] mod tests { - use nom_trace::print_trace; - use std::convert::{TryFrom, TryInto}; + use std::convert::TryInto; use super::*; - use BinaryOperator::*; - use Expr::{BinaryOp, If, Let, UnaryOp}; - use UnaryOperator::*; - - fn ident_expr(s: &str) -> Box { - Box::new(Expr::Ident(Ident::try_from(s).unwrap())) - } - - macro_rules! test_parse { - ($parser: ident, $src: expr) => {{ - let res = $parser($src); - print_trace!(); - let (rem, res) = res.unwrap(); - assert!( - rem.is_empty(), - "non-empty remainder: \"{}\", parsed: {:?}", - rem, - res - ); - res - }}; - } - - mod operators { - use super::*; - - #[test] - fn mul_plus() { - let (rem, res) = expr("x*y+z").unwrap(); - assert!(rem.is_empty()); - assert_eq!( - res, - BinaryOp { - lhs: Box::new(BinaryOp { - lhs: ident_expr("x"), - op: Mul, - rhs: ident_expr("y") - }), - op: Add, - rhs: ident_expr("z") - } - ) - } - - #[test] - fn mul_plus_ws() { - let (rem, res) = expr("x * y + z").unwrap(); - assert!(rem.is_empty(), "non-empty remainder: \"{}\"", rem); - assert_eq!( - res, - BinaryOp { - lhs: Box::new(BinaryOp { - lhs: ident_expr("x"), - op: Mul, - rhs: ident_expr("y") - }), - op: Add, - rhs: ident_expr("z") - } - ) - } - - #[test] - fn unary() { - let (rem, res) = expr("x * -z").unwrap(); - assert!(rem.is_empty(), "non-empty remainder: \"{}\"", rem); - assert_eq!( - res, - BinaryOp { - lhs: ident_expr("x"), - op: Mul, - rhs: Box::new(UnaryOp { - op: Neg, - rhs: ident_expr("z"), - }) - } - ) - } - - #[test] - fn mul_literal() { - let (rem, res) = expr("x * 3").unwrap(); - assert!(rem.is_empty()); - assert_eq!( - res, - BinaryOp { - lhs: ident_expr("x"), - op: Mul, - rhs: Box::new(Expr::Literal(Literal::Int(3))), - } - ) - } - - #[test] - fn equ() { - let res = test_parse!(expr, "x * 7 == 7"); - assert_eq!( - res, - BinaryOp { - lhs: Box::new(BinaryOp { - lhs: ident_expr("x"), - op: Mul, - rhs: Box::new(Expr::Literal(Literal::Int(7))) - }), - op: Equ, - rhs: Box::new(Expr::Literal(Literal::Int(7))) - } - ) - } - } - - #[test] - fn let_complex() { - let res = test_parse!(expr, "let x = 1; y = x * 7 in (x + y) * 4"); - assert_eq!( - res, - Let { - bindings: vec![ - ( - Ident::try_from("x").unwrap(), - Expr::Literal(Literal::Int(1)) - ), - ( - Ident::try_from("y").unwrap(), - Expr::BinaryOp { - lhs: ident_expr("x"), - op: Mul, - rhs: Box::new(Expr::Literal(Literal::Int(7))) - } - ) - ], - body: Box::new(Expr::BinaryOp { - lhs: Box::new(Expr::BinaryOp { - lhs: ident_expr("x"), - op: Add, - rhs: ident_expr("y"), - }), - op: Mul, - rhs: Box::new(Expr::Literal(Literal::Int(4))), - }) - } - ) - } - - #[test] - fn if_simple() { - let res = test_parse!(expr, "if x == 8 then 9 else 20"); - assert_eq!( - res, - If { - condition: Box::new(BinaryOp { - lhs: ident_expr("x"), - op: Equ, - rhs: Box::new(Expr::Literal(Literal::Int(8))), - }), - then: Box::new(Expr::Literal(Literal::Int(9))), - else_: Box::new(Expr::Literal(Literal::Int(20))) - } - ) - } + use expr::tests::ident_expr; #[test] fn fn_decl() { @@ -510,69 +90,6 @@ mod tests { ) } - #[test] - fn no_arg_call() { - let res = test_parse!(expr, "f()"); - assert_eq!( - res, - Expr::Call { - fun: ident_expr("f"), - args: vec![] - } - ); - } - - #[test] - fn call_with_args() { - let res = test_parse!(expr, "f x 1"); - assert_eq!( - res, - Expr::Call { - fun: ident_expr("f"), - args: vec![*ident_expr("x"), Expr::Literal(Literal::Int(1))] - } - ) - } - - #[test] - fn call_funcref() { - let res = test_parse!(expr, "(let x = 1 in x) 2"); - assert_eq!( - res, - Expr::Call { - fun: Box::new(Expr::Let { - bindings: vec![( - Ident::try_from("x").unwrap(), - Expr::Literal(Literal::Int(1)) - )], - body: ident_expr("x") - }), - args: vec![Expr::Literal(Literal::Int(2))] - } - ) - } - - #[test] - fn anon_function() { - let res = test_parse!(expr, "let id = fn x = x in id 1"); - assert_eq!( - res, - Expr::Let { - bindings: vec![( - Ident::try_from("id").unwrap(), - Expr::Fun(Box::new(Fun { - args: vec![Ident::try_from("x").unwrap()], - body: *ident_expr("x") - })) - )], - body: Box::new(Expr::Call { - fun: ident_expr("id"), - args: vec![Expr::Literal(Literal::Int(1))], - }) - } - ); - } - #[test] fn multiple_decls() { let res = test_parse!( -- cgit 1.4.1 From f8beda81fbe8d04883aee71ff4ea078f897c6de4 Mon Sep 17 00:00:00 2001 From: Griffin Smith Date: Sat, 13 Mar 2021 13:12:03 -0500 Subject: Allow exprs+bindings to optionally be ascripted --- src/ast/mod.rs | 33 ++++++++++-- src/codegen/llvm.rs | 9 ++-- src/interpreter/mod.rs | 11 ++-- src/parser/expr.rs | 143 ++++++++++++++++++++++++++++++++++++++++--------- src/parser/mod.rs | 2 + src/parser/type_.rs | 104 +++++++++++++++++++++++++++++++++++ 6 files changed, 264 insertions(+), 38 deletions(-) create mode 100644 src/parser/type_.rs diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 7f3543271db8..dc22ac3cdb56 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -109,6 +109,23 @@ pub enum Literal { Int(u64), } +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Binding<'a> { + pub ident: Ident<'a>, + pub type_: Option, + pub body: Expr<'a>, +} + +impl<'a> Binding<'a> { + fn to_owned(&self) -> Binding<'static> { + Binding { + ident: self.ident.to_owned(), + type_: self.type_.clone(), + body: self.body.to_owned(), + } + } +} + #[derive(Debug, PartialEq, Eq, Clone)] pub enum Expr<'a> { Ident(Ident<'a>), @@ -127,7 +144,7 @@ pub enum Expr<'a> { }, Let { - bindings: Vec<(Ident<'a>, Expr<'a>)>, + bindings: Vec>, body: Box>, }, @@ -143,6 +160,11 @@ pub enum Expr<'a> { fun: Box>, args: Vec>, }, + + Ascription { + expr: Box>, + type_: Type, + }, } impl<'a> Expr<'a> { @@ -160,10 +182,7 @@ impl<'a> Expr<'a> { rhs: Box::new((**rhs).to_owned()), }, Expr::Let { bindings, body } => Expr::Let { - bindings: bindings - .iter() - .map(|(id, expr)| (id.to_owned(), expr.to_owned())) - .collect(), + bindings: bindings.iter().map(|binding| binding.to_owned()).collect(), body: Box::new((**body).to_owned()), }, Expr::If { @@ -180,6 +199,10 @@ impl<'a> Expr<'a> { fun: Box::new((**fun).to_owned()), args: args.iter().map(|arg| arg.to_owned()).collect(), }, + Expr::Ascription { expr, type_ } => Expr::Ascription { + expr: Box::new((**expr).to_owned()), + type_: type_.clone(), + }, } } } diff --git a/src/codegen/llvm.rs b/src/codegen/llvm.rs index 1d1c742a9434..1f4a457cd81b 100644 --- a/src/codegen/llvm.rs +++ b/src/codegen/llvm.rs @@ -12,7 +12,7 @@ use inkwell::values::{AnyValueEnum, BasicValueEnum, FunctionValue}; use inkwell::IntPredicate; use thiserror::Error; -use crate::ast::{BinaryOperator, Decl, Expr, Fun, Ident, Literal, UnaryOperator}; +use crate::ast::{BinaryOperator, Binding, Decl, Expr, Fun, Ident, Literal, UnaryOperator}; use crate::common::env::Env; #[derive(Debug, PartialEq, Eq, Error)] @@ -137,9 +137,9 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { } Expr::Let { bindings, body } => { self.env.push(); - for (id, val) in bindings { - let val = self.codegen_expr(val)?; - self.env.set(id, val); + for Binding { ident, body, .. } in bindings { + let val = self.codegen_expr(body)?; + self.env.set(ident, val); } let res = self.codegen_expr(body); self.env.pop(); @@ -207,6 +207,7 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { self.env.restore(env); Ok(function.into()) } + Expr::Ascription { expr, .. } => self.codegen_expr(expr), } } diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs index fc1556d1c292..00421ee90dc8 100644 --- a/src/interpreter/mod.rs +++ b/src/interpreter/mod.rs @@ -3,7 +3,9 @@ mod value; pub use self::error::{Error, Result}; pub use self::value::{Function, Value}; -use crate::ast::{BinaryOperator, Expr, FunctionType, Ident, Literal, Type, UnaryOperator}; +use crate::ast::{ + BinaryOperator, Binding, Expr, FunctionType, Ident, Literal, Type, UnaryOperator, +}; use crate::common::env::Env; #[derive(Debug, Default)] @@ -49,9 +51,9 @@ impl<'a> Interpreter<'a> { } Expr::Let { bindings, body } => { self.env.push(); - for (id, val) in bindings { - let val = self.eval(val)?; - self.env.set(id, val); + for Binding { ident, body, .. } in bindings { + let val = self.eval(body)?; + self.env.set(ident, val); } let res = self.eval(body)?; self.env.pop(); @@ -101,6 +103,7 @@ impl<'a> Interpreter<'a> { args: fun.args.iter().map(|arg| arg.to_owned()).collect(), body: fun.body.to_owned(), })), + Expr::Ascription { expr, .. } => self.eval(expr), } } } diff --git a/src/parser/expr.rs b/src/parser/expr.rs index a42c7a6c765e..2fda3e93fae9 100644 --- a/src/parser/expr.rs +++ b/src/parser/expr.rs @@ -1,12 +1,12 @@ use nom::character::complete::{digit1, multispace0, multispace1}; use nom::{ - alt, char, complete, delimited, do_parse, flat_map, many0, map, named, parse_to, - separated_list0, separated_list1, tag, tuple, + alt, call, char, complete, delimited, do_parse, flat_map, many0, map, named, opt, parse_to, + preceded, separated_list0, separated_list1, tag, tuple, }; use pratt::{Affix, Associativity, PrattParser, Precedence}; -use crate::ast::{BinaryOperator, Expr, Fun, Ident, Literal, UnaryOperator}; -use crate::parser::ident; +use crate::ast::{BinaryOperator, Binding, Expr, Fun, Ident, Literal, UnaryOperator}; +use crate::parser::{ident, type_}; #[derive(Debug)] enum TokenTree<'a> { @@ -158,14 +158,20 @@ named!(int(&str) -> Literal, map!(flat_map!(digit1, parse_to!(u64)), Literal::In named!(literal(&str) -> Expr, map!(alt!(int), Expr::Literal)); -named!(binding(&str) -> (Ident, Expr), do_parse!( +named!(binding(&str) -> Binding, do_parse!( multispace0 >> ident: ident >> multispace0 + >> type_: opt!(preceded!(tuple!(tag!(":"), multispace0), type_)) + >> multispace0 >> char!('=') >> multispace0 - >> expr: expr - >> (ident, expr) + >> body: expr + >> (Binding { + ident, + type_, + body + }) )); named!(let_(&str) -> Expr, do_parse!( @@ -203,6 +209,25 @@ named!(if_(&str) -> Expr, do_parse! ( named!(ident_expr(&str) -> Expr, map!(ident, Expr::Ident)); +fn ascripted<'a>( + p: impl Fn(&'a str) -> nom::IResult<&'a str, Expr, nom::error::Error<&'a str>> + 'a, +) -> impl Fn(&'a str) -> nom::IResult<&str, Expr, nom::error::Error<&'a str>> { + move |i| { + do_parse!( + i, + expr: p + >> multispace0 + >> complete!(tag!(":")) + >> multispace0 + >> type_: type_ + >> (Expr::Ascription { + expr: Box::new(expr), + type_ + }) + ) + } +} + named!(paren_expr(&str) -> Expr, delimited!(complete!(tag!("(")), expr, complete!(tag!(")")))); @@ -251,7 +276,7 @@ named!(call_with_args(&str) -> Expr, do_parse!( }) )); -named!(simple_expr(&str) -> Expr, alt!( +named!(simple_expr_unascripted(&str) -> Expr, alt!( let_ | if_ | fun_expr | @@ -259,17 +284,24 @@ named!(simple_expr(&str) -> Expr, alt!( ident_expr )); +named!(simple_expr(&str) -> Expr, alt!( + call!(ascripted(simple_expr_unascripted)) | + simple_expr_unascripted +)); + named!(pub expr(&str) -> Expr, alt!( no_arg_call | call_with_args | map!(token_tree, |tt| { ExprParser.parse(&mut tt.into_iter()).unwrap() }) | - simple_expr)); + simple_expr +)); #[cfg(test)] pub(crate) mod tests { use super::*; + use crate::ast::Type; use std::convert::TryFrom; use BinaryOperator::*; use Expr::{BinaryOp, If, Let, UnaryOp}; @@ -374,18 +406,20 @@ pub(crate) mod tests { res, Let { bindings: vec![ - ( - Ident::try_from("x").unwrap(), - Expr::Literal(Literal::Int(1)) - ), - ( - Ident::try_from("y").unwrap(), - Expr::BinaryOp { + Binding { + ident: Ident::try_from("x").unwrap(), + type_: None, + body: Expr::Literal(Literal::Int(1)) + }, + Binding { + ident: Ident::try_from("y").unwrap(), + type_: None, + body: Expr::BinaryOp { lhs: ident_expr("x"), op: Mul, rhs: Box::new(Expr::Literal(Literal::Int(7))) } - ) + } ], body: Box::new(Expr::BinaryOp { lhs: Box::new(Expr::BinaryOp { @@ -448,10 +482,11 @@ pub(crate) mod tests { res, Expr::Call { fun: Box::new(Expr::Let { - bindings: vec![( - Ident::try_from("x").unwrap(), - Expr::Literal(Literal::Int(1)) - )], + bindings: vec![Binding { + ident: Ident::try_from("x").unwrap(), + type_: None, + body: Expr::Literal(Literal::Int(1)) + }], body: ident_expr("x") }), args: vec![Expr::Literal(Literal::Int(2))] @@ -465,13 +500,14 @@ pub(crate) mod tests { assert_eq!( res, Expr::Let { - bindings: vec![( - Ident::try_from("id").unwrap(), - Expr::Fun(Box::new(Fun { + bindings: vec![Binding { + ident: Ident::try_from("id").unwrap(), + type_: None, + body: Expr::Fun(Box::new(Fun { args: vec![Ident::try_from("x").unwrap()], body: *ident_expr("x") })) - )], + }], body: Box::new(Expr::Call { fun: ident_expr("id"), args: vec![Expr::Literal(Literal::Int(1))], @@ -479,4 +515,61 @@ pub(crate) mod tests { } ); } + + mod ascriptions { + use super::*; + + #[test] + fn bare_ascription() { + let res = test_parse!(expr, "1: float"); + assert_eq!( + res, + Expr::Ascription { + expr: Box::new(Expr::Literal(Literal::Int(1))), + type_: Type::Float + } + ) + } + + #[test] + fn fn_body_ascription() { + let res = test_parse!(expr, "let const_1 = fn x = 1: int in const_1 2"); + assert_eq!( + res, + Expr::Let { + bindings: vec![Binding { + ident: Ident::try_from("const_1").unwrap(), + type_: None, + body: Expr::Fun(Box::new(Fun { + args: vec![Ident::try_from("x").unwrap()], + body: Expr::Ascription { + expr: Box::new(Expr::Literal(Literal::Int(1))), + type_: Type::Int, + } + })) + }], + body: Box::new(Expr::Call { + fun: ident_expr("const_1"), + args: vec![Expr::Literal(Literal::Int(2))] + }) + } + ) + } + + #[test] + fn let_binding_ascripted() { + let res = test_parse!(expr, "let x: int = 1 in x"); + assert_eq!( + res, + Expr::Let { + bindings: vec![Binding { + ident: Ident::try_from("x").unwrap(), + type_: Some(Type::Int), + body: Expr::Literal(Literal::Int(1)) + }], + body: ident_expr("x") + } + ) + } + } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3e162d449320..af7dff6ff213 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5,9 +5,11 @@ use nom::{alt, char, complete, do_parse, many0, named, separated_list0, tag}; #[macro_use] mod macros; mod expr; +mod type_; use crate::ast::{Decl, Fun, Ident}; pub use expr::expr; +pub use type_::type_; pub type Error = nom::Err>; diff --git a/src/parser/type_.rs b/src/parser/type_.rs new file mode 100644 index 000000000000..076df7d6bd55 --- /dev/null +++ b/src/parser/type_.rs @@ -0,0 +1,104 @@ +use nom::character::complete::{multispace0, multispace1}; +use nom::{alt, delimited, do_parse, map, named, opt, separated_list0, tag, terminated, tuple}; + +use crate::ast::{FunctionType, Type}; + +named!(function_type(&str) -> Type, do_parse!( + tag!("fn") + >> multispace1 + >> args: map!(opt!(terminated!(separated_list0!( + tuple!( + multispace0, + tag!(","), + multispace0 + ), + type_ + ), multispace1)), |args| args.unwrap_or_default()) + >> tag!("->") + >> multispace1 + >> ret: type_ + >> (Type::Function(FunctionType { + args, + ret: Box::new(ret) + })) +)); + +named!(pub type_(&str) -> Type, alt!( + tag!("int") => { |_| Type::Int } | + tag!("float") => { |_| Type::Float } | + tag!("bool") => { |_| Type::Bool } | + function_type | + delimited!( + tuple!(tag!("("), multispace0), + type_, + tuple!(tag!(")"), multispace0) + ) +)); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn simple_types() { + assert_eq!(test_parse!(type_, "int"), Type::Int); + assert_eq!(test_parse!(type_, "float"), Type::Float); + assert_eq!(test_parse!(type_, "bool"), Type::Bool); + } + + #[test] + fn no_arg_fn_type() { + assert_eq!( + test_parse!(type_, "fn -> int"), + Type::Function(FunctionType { + args: vec![], + ret: Box::new(Type::Int) + }) + ); + } + + #[test] + fn fn_type_with_args() { + assert_eq!( + test_parse!(type_, "fn int, bool -> int"), + Type::Function(FunctionType { + args: vec![Type::Int, Type::Bool], + ret: Box::new(Type::Int) + }) + ); + } + + #[test] + fn fn_taking_fn() { + assert_eq!( + test_parse!(type_, "fn fn int, bool -> bool, float -> float"), + Type::Function(FunctionType { + args: vec![ + Type::Function(FunctionType { + args: vec![Type::Int, Type::Bool], + ret: Box::new(Type::Bool) + }), + Type::Float + ], + ret: Box::new(Type::Float) + }) + ) + } + + #[test] + fn parenthesized() { + assert_eq!( + test_parse!(type_, "fn (fn int, bool -> bool), float -> float"), + Type::Function(FunctionType { + args: vec![ + Type::Function(FunctionType { + args: vec![Type::Int, Type::Bool], + ret: Box::new(Type::Bool) + }), + Type::Float + ], + ret: Box::new(Type::Float) + }) + ) + } +} -- cgit 1.4.1 From 32a5c0ff0fc58aa6721c1e0ad41950bde2d66744 Mon Sep 17 00:00:00 2001 From: Griffin Smith Date: Sat, 13 Mar 2021 21:57:27 -0500 Subject: Add the start of a hindley-milner typechecker The beginning of a parse-don't-validate-based hindley-milner typechecker, which returns on success an IR where every AST node trivially knows its own type, and using those types to determine LLVM types in codegen. --- Cargo.lock | 1 + Cargo.toml | 1 + ach/.gitignore | 3 + src/ast/hir.rs | 246 ++++++++++++++++++++++ src/ast/mod.rs | 3 + src/codegen/llvm.rs | 76 ++++--- src/codegen/mod.rs | 5 +- src/commands/check.rs | 39 ++++ src/commands/eval.rs | 6 +- src/commands/mod.rs | 2 + src/common/env.rs | 24 ++- src/common/error.rs | 11 +- src/compiler.rs | 4 +- src/interpreter/mod.rs | 70 ++++--- src/interpreter/value.rs | 5 +- src/main.rs | 3 + src/parser/expr.rs | 25 ++- src/parser/macros.rs | 1 + src/parser/mod.rs | 5 +- src/tc/mod.rs | 528 +++++++++++++++++++++++++++++++++++++++++++++++ 20 files changed, 980 insertions(+), 78 deletions(-) create mode 100644 src/ast/hir.rs create mode 100644 src/commands/check.rs create mode 100644 src/tc/mod.rs diff --git a/Cargo.lock b/Cargo.lock index d8eaedeca181..8ec5ad6cf952 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9,6 +9,7 @@ dependencies = [ "derive_more", "inkwell", "itertools", + "lazy_static", "llvm-sys", "nom", "nom-trace", diff --git a/Cargo.toml b/Cargo.toml index c9796a821586..2ac7d2540961 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ clap = "3.0.0-beta.2" derive_more = "0.99.11" inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm11-0"] } itertools = "0.10.0" +lazy_static = "1.4.0" llvm-sys = "110.0.1" nom = "6.1.2" nom-trace = { git = "https://github.com/glittershark/nom-trace", branch = "nom-6" } diff --git a/ach/.gitignore b/ach/.gitignore index e8423ae351b8..683a53a01f6c 100644 --- a/ach/.gitignore +++ b/ach/.gitignore @@ -1,2 +1,5 @@ *.ll *.o + +functions +simple diff --git a/src/ast/hir.rs b/src/ast/hir.rs new file mode 100644 index 000000000000..151ddd529872 --- /dev/null +++ b/src/ast/hir.rs @@ -0,0 +1,246 @@ +use itertools::Itertools; + +use super::{BinaryOperator, Ident, Literal, UnaryOperator}; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Binding<'a, T> { + pub ident: Ident<'a>, + pub type_: T, + pub body: Expr<'a, T>, +} + +impl<'a, T> Binding<'a, T> { + fn to_owned(&self) -> Binding<'static, T> + where + T: Clone, + { + Binding { + ident: self.ident.to_owned(), + type_: self.type_.clone(), + body: self.body.to_owned(), + } + } +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum Expr<'a, T> { + Ident(Ident<'a>, T), + + Literal(Literal, T), + + UnaryOp { + op: UnaryOperator, + rhs: Box>, + type_: T, + }, + + BinaryOp { + lhs: Box>, + op: BinaryOperator, + rhs: Box>, + type_: T, + }, + + Let { + bindings: Vec>, + body: Box>, + type_: T, + }, + + If { + condition: Box>, + then: Box>, + else_: Box>, + type_: T, + }, + + Fun { + args: Vec<(Ident<'a>, T)>, + body: Box>, + type_: T, + }, + + Call { + fun: Box>, + args: Vec>, + type_: T, + }, +} + +impl<'a, T> Expr<'a, T> { + pub fn type_(&self) -> &T { + match self { + Expr::Ident(_, t) => t, + Expr::Literal(_, t) => t, + Expr::UnaryOp { type_, .. } => type_, + Expr::BinaryOp { type_, .. } => type_, + Expr::Let { type_, .. } => type_, + Expr::If { type_, .. } => type_, + Expr::Fun { type_, .. } => type_, + Expr::Call { type_, .. } => type_, + } + } + + pub fn traverse_type(self, f: F) -> Result, E> + where + F: Fn(T) -> Result + Clone, + { + match self { + Expr::Ident(id, t) => Ok(Expr::Ident(id, f(t)?)), + Expr::Literal(lit, t) => Ok(Expr::Literal(lit, f(t)?)), + Expr::UnaryOp { op, rhs, type_ } => Ok(Expr::UnaryOp { + op, + rhs: Box::new(rhs.traverse_type(f.clone())?), + type_: f(type_)?, + }), + Expr::BinaryOp { + lhs, + op, + rhs, + type_, + } => Ok(Expr::BinaryOp { + lhs: Box::new(lhs.traverse_type(f.clone())?), + op, + rhs: Box::new(rhs.traverse_type(f.clone())?), + type_: f(type_)?, + }), + Expr::Let { + bindings, + body, + type_, + } => Ok(Expr::Let { + bindings: bindings + .into_iter() + .map(|Binding { ident, type_, body }| { + Ok(Binding { + ident, + type_: f(type_)?, + body: body.traverse_type(f.clone())?, + }) + }) + .collect::, E>>()?, + body: Box::new(body.traverse_type(f.clone())?), + type_: f(type_)?, + }), + Expr::If { + condition, + then, + else_, + type_, + } => Ok(Expr::If { + condition: Box::new(condition.traverse_type(f.clone())?), + then: Box::new(then.traverse_type(f.clone())?), + else_: Box::new(else_.traverse_type(f.clone())?), + type_: f(type_)?, + }), + Expr::Fun { args, body, type_ } => Ok(Expr::Fun { + args: args + .into_iter() + .map(|(id, t)| Ok((id, f.clone()(t)?))) + .collect::, E>>()?, + body: Box::new(body.traverse_type(f.clone())?), + type_: f(type_)?, + }), + Expr::Call { fun, args, type_ } => Ok(Expr::Call { + fun: Box::new(fun.traverse_type(f.clone())?), + args: args + .into_iter() + .map(|e| e.traverse_type(f.clone())) + .collect::, E>>()?, + type_: f(type_)?, + }), + } + } + + pub fn to_owned(&self) -> Expr<'static, T> + where + T: Clone, + { + match self { + Expr::Ident(id, t) => Expr::Ident(id.to_owned(), t.clone()), + Expr::Literal(lit, t) => Expr::Literal(lit.clone(), t.clone()), + Expr::UnaryOp { op, rhs, type_ } => Expr::UnaryOp { + op: *op, + rhs: Box::new((**rhs).to_owned()), + type_: type_.clone(), + }, + Expr::BinaryOp { + lhs, + op, + rhs, + type_, + } => Expr::BinaryOp { + lhs: Box::new((**lhs).to_owned()), + op: *op, + rhs: Box::new((**rhs).to_owned()), + type_: type_.clone(), + }, + Expr::Let { + bindings, + body, + type_, + } => Expr::Let { + bindings: bindings.into_iter().map(|b| b.to_owned()).collect(), + body: Box::new((**body).to_owned()), + type_: type_.clone(), + }, + Expr::If { + condition, + then, + else_, + type_, + } => Expr::If { + condition: Box::new((**condition).to_owned()), + then: Box::new((**then).to_owned()), + else_: Box::new((**else_).to_owned()), + type_: type_.clone(), + }, + Expr::Fun { args, body, type_ } => Expr::Fun { + args: args + .into_iter() + .map(|(id, t)| (id.to_owned(), t.clone())) + .collect(), + body: Box::new((**body).to_owned()), + type_: type_.clone(), + }, + Expr::Call { fun, args, type_ } => Expr::Call { + fun: Box::new((**fun).to_owned()), + args: args.into_iter().map(|e| e.to_owned()).collect(), + type_: type_.clone(), + }, + } + } +} + +pub enum Decl<'a, T> { + Fun { + name: Ident<'a>, + args: Vec<(Ident<'a>, T)>, + body: Box>, + type_: T, + }, +} + +impl<'a, T> Decl<'a, T> { + pub fn traverse_type(self, f: F) -> Result, E> + where + F: Fn(T) -> Result + Clone, + { + match self { + Decl::Fun { + name, + args, + body, + type_, + } => Ok(Decl::Fun { + name, + args: args + .into_iter() + .map(|(id, t)| Ok((id, f(t)?))) + .try_collect()?, + body: Box::new(body.traverse_type(f.clone())?), + type_: f(type_)?, + }), + } + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index dc22ac3cdb56..cef366d16e04 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1,3 +1,5 @@ +pub(crate) mod hir; + use std::borrow::Cow; use std::convert::TryFrom; use std::fmt::{self, Display, Formatter}; @@ -107,6 +109,7 @@ pub enum UnaryOperator { #[derive(Debug, PartialEq, Eq, Clone)] pub enum Literal { Int(u64), + Bool(bool), } #[derive(Debug, PartialEq, Eq, Clone)] diff --git a/src/codegen/llvm.rs b/src/codegen/llvm.rs index 1f4a457cd81b..5b5db90a1ad8 100644 --- a/src/codegen/llvm.rs +++ b/src/codegen/llvm.rs @@ -7,12 +7,13 @@ use inkwell::builder::Builder; pub use inkwell::context::Context; use inkwell::module::Module; use inkwell::support::LLVMString; -use inkwell::types::FunctionType; +use inkwell::types::{BasicType, BasicTypeEnum, FunctionType, IntType}; use inkwell::values::{AnyValueEnum, BasicValueEnum, FunctionValue}; use inkwell::IntPredicate; use thiserror::Error; -use crate::ast::{BinaryOperator, Binding, Decl, Expr, Fun, Ident, Literal, UnaryOperator}; +use crate::ast::hir::{Binding, Decl, Expr}; +use crate::ast::{BinaryOperator, Ident, Literal, Type, UnaryOperator}; use crate::common::env::Env; #[derive(Debug, PartialEq, Eq, Error)] @@ -36,7 +37,7 @@ pub struct Codegen<'ctx, 'ast> { context: &'ctx Context, pub module: Module<'ctx>, builder: Builder<'ctx>, - env: Env<'ast, AnyValueEnum<'ctx>>, + env: Env<&'ast Ident<'ast>, AnyValueEnum<'ctx>>, function_stack: Vec>, identifier_counter: u32, } @@ -77,18 +78,23 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { .append_basic_block(*self.function_stack.last().unwrap(), name) } - pub fn codegen_expr(&mut self, expr: &'ast Expr<'ast>) -> Result> { + pub fn codegen_expr(&mut self, expr: &'ast Expr<'ast, Type>) -> Result> { match expr { - Expr::Ident(id) => self + Expr::Ident(id, _) => self .env .resolve(id) .cloned() .ok_or_else(|| Error::UndefinedVariable(id.to_owned())), - Expr::Literal(Literal::Int(i)) => { - let ty = self.context.i64_type(); - Ok(AnyValueEnum::IntValue(ty.const_int(*i, false))) + Expr::Literal(lit, ty) => { + let ty = self.codegen_int_type(ty); + match lit { + Literal::Int(i) => Ok(AnyValueEnum::IntValue(ty.const_int(*i, false))), + Literal::Bool(b) => Ok(AnyValueEnum::IntValue( + ty.const_int(if *b { 1 } else { 0 }, false), + )), + } } - Expr::UnaryOp { op, rhs } => { + Expr::UnaryOp { op, rhs, .. } => { let rhs = self.codegen_expr(rhs)?; match op { UnaryOperator::Not => unimplemented!(), @@ -97,7 +103,7 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { )), } } - Expr::BinaryOp { lhs, op, rhs } => { + Expr::BinaryOp { lhs, op, rhs, .. } => { let lhs = self.codegen_expr(lhs)?; let rhs = self.codegen_expr(rhs)?; match op { @@ -135,7 +141,7 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { BinaryOperator::Neq => todo!(), } } - Expr::Let { bindings, body } => { + Expr::Let { bindings, body, .. } => { self.env.push(); for Binding { ident, body, .. } in bindings { let val = self.codegen_expr(body)?; @@ -149,6 +155,7 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { condition, then, else_, + type_, } => { let then_block = self.append_basic_block("then"); let else_block = self.append_basic_block("else"); @@ -168,15 +175,15 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { self.builder.build_unconditional_branch(join_block); self.builder.position_at_end(join_block); - let phi = self.builder.build_phi(self.context.i64_type(), "join"); + let phi = self.builder.build_phi(self.codegen_type(type_), "join"); phi.add_incoming(&[ (&BasicValueEnum::try_from(then_res).unwrap(), then_block), (&BasicValueEnum::try_from(else_res).unwrap(), else_block), ]); Ok(phi.as_basic_value().into()) } - Expr::Call { fun, args } => { - if let Expr::Ident(id) = &**fun { + Expr::Call { fun, args, .. } => { + if let Expr::Ident(id, _) = &**fun { let function = self .module .get_function(id.into()) @@ -197,8 +204,7 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { todo!() } } - Expr::Fun(fun) => { - let Fun { args, body } = &**fun; + Expr::Fun { args, body, .. } => { let fname = self.fresh_ident("f"); let cur_block = self.builder.get_insert_block().unwrap(); let env = self.env.save(); // TODO: closures @@ -207,29 +213,27 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { self.env.restore(env); Ok(function.into()) } - Expr::Ascription { expr, .. } => self.codegen_expr(expr), } } pub fn codegen_function( &mut self, name: &str, - args: &'ast [Ident<'ast>], - body: &'ast Expr<'ast>, + args: &'ast [(Ident<'ast>, Type)], + body: &'ast Expr<'ast, Type>, ) -> Result> { - let i64_type = self.context.i64_type(); self.new_function( name, - i64_type.fn_type( + self.codegen_type(body.type_()).fn_type( args.iter() - .map(|_| i64_type.into()) + .map(|(_, at)| self.codegen_type(at)) .collect::>() .as_slice(), false, ), ); self.env.push(); - for (i, arg) in args.iter().enumerate() { + for (i, (arg, _)) in args.iter().enumerate() { self.env.set( arg, self.cur_function().get_nth_param(i as u32).unwrap().into(), @@ -240,11 +244,10 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { Ok(self.finish_function(&res)) } - pub fn codegen_decl(&mut self, decl: &'ast Decl<'ast>) -> Result<()> { + pub fn codegen_decl(&mut self, decl: &'ast Decl<'ast, Type>) -> Result<()> { match decl { Decl::Fun { - name, - body: Fun { args, body }, + name, args, body, .. } => { self.codegen_function(name.into(), args, body)?; Ok(()) @@ -252,13 +255,28 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { } } - pub fn codegen_main(&mut self, expr: &'ast Expr<'ast>) -> Result<()> { + pub fn codegen_main(&mut self, expr: &'ast Expr<'ast, Type>) -> Result<()> { self.new_function("main", self.context.i64_type().fn_type(&[], false)); let res = self.codegen_expr(expr)?.try_into().unwrap(); - self.finish_function(&res); + if *expr.type_() != Type::Int { + self.builder + .build_return(Some(&self.context.i64_type().const_int(0, false))); + } else { + self.finish_function(&res); + } Ok(()) } + fn codegen_type(&self, type_: &'ast Type) -> BasicTypeEnum<'ctx> { + // TODO + self.context.i64_type().into() + } + + fn codegen_int_type(&self, type_: &'ast Type) -> IntType<'ctx> { + // TODO + self.context.i64_type() + } + pub fn print_to_file

(&self, path: P) -> Result<()> where P: AsRef, @@ -299,6 +317,8 @@ mod tests { fn jit_eval(expr: &str) -> anyhow::Result { let expr = crate::parser::expr(expr).unwrap().1; + let expr = crate::tc::typecheck_expr(expr).unwrap(); + let context = Context::create(); let mut codegen = Codegen::new(&context, "test"); let execution_engine = codegen diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index 6f95d90b45a1..8ef057dba04f 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -4,10 +4,11 @@ use inkwell::execution_engine::JitFunction; use inkwell::OptimizationLevel; pub use llvm::*; -use crate::ast::Expr; +use crate::ast::hir::Expr; +use crate::ast::Type; use crate::common::Result; -pub fn jit_eval(expr: &Expr) -> Result { +pub fn jit_eval(expr: &Expr) -> Result { let context = Context::create(); let mut codegen = Codegen::new(&context, "eval"); let execution_engine = codegen diff --git a/src/commands/check.rs b/src/commands/check.rs new file mode 100644 index 000000000000..40de288a282c --- /dev/null +++ b/src/commands/check.rs @@ -0,0 +1,39 @@ +use clap::Clap; +use std::path::PathBuf; + +use crate::ast::Type; +use crate::{parser, tc, Result}; + +/// Typecheck a file or expression +#[derive(Clap)] +pub struct Check { + /// File to check + path: Option, + + /// Expression to check + #[clap(long, short = 'e')] + expr: Option, +} + +fn run_expr(expr: String) -> Result { + let (_, parsed) = parser::expr(&expr)?; + let hir_expr = tc::typecheck_expr(parsed)?; + Ok(hir_expr.type_().clone()) +} + +fn run_path(path: PathBuf) -> Result { + todo!() +} + +impl Check { + pub fn run(self) -> Result<()> { + let type_ = match (self.path, self.expr) { + (None, None) => Err("Must specify either a file or expression to check".into()), + (Some(_), Some(_)) => Err("Cannot specify both a file and expression to check".into()), + (None, Some(expr)) => run_expr(expr), + (Some(path), None) => run_path(path), + }?; + println!("type: {}", type_); + Ok(()) + } +} diff --git a/src/commands/eval.rs b/src/commands/eval.rs index 112bee64625b..61a712c08a8e 100644 --- a/src/commands/eval.rs +++ b/src/commands/eval.rs @@ -3,6 +3,7 @@ use clap::Clap; use crate::codegen; use crate::interpreter; use crate::parser; +use crate::tc; use crate::Result; /// Evaluate an expression and print its result @@ -19,10 +20,11 @@ pub struct Eval { impl Eval { pub fn run(self) -> Result<()> { let (_, parsed) = parser::expr(&self.expr)?; + let hir = tc::typecheck_expr(parsed)?; let result = if self.jit { - codegen::jit_eval::(&parsed)?.into() + codegen::jit_eval::(&hir)?.into() } else { - interpreter::eval(&parsed)? + interpreter::eval(&hir)? }; println!("{}", result); Ok(()) diff --git a/src/commands/mod.rs b/src/commands/mod.rs index 9c0038dabfb1..fd0a822708c2 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1,5 +1,7 @@ +pub mod check; pub mod compile; pub mod eval; +pub use check::Check; pub use compile::Compile; pub use eval::Eval; diff --git a/src/common/env.rs b/src/common/env.rs index f499323639e3..59a5e46c466f 100644 --- a/src/common/env.rs +++ b/src/common/env.rs @@ -1,19 +1,25 @@ +use std::borrow::Borrow; use std::collections::HashMap; +use std::hash::Hash; use std::mem; -use crate::ast::Ident; - /// A lexical environment #[derive(Debug, PartialEq, Eq)] -pub struct Env<'ast, V>(Vec, V>>); +pub struct Env(Vec>); -impl<'ast, V> Default for Env<'ast, V> { +impl Default for Env +where + K: Eq + Hash, +{ fn default() -> Self { Self::new() } } -impl<'ast, V> Env<'ast, V> { +impl Env +where + K: Eq + Hash, +{ pub fn new() -> Self { Self(vec![Default::default()]) } @@ -34,11 +40,15 @@ impl<'ast, V> Env<'ast, V> { *self = saved; } - pub fn set(&mut self, k: &'ast Ident<'ast>, v: V) { + pub fn set(&mut self, k: K, v: V) { self.0.last_mut().unwrap().insert(k, v); } - pub fn resolve<'a>(&'a self, k: &'ast Ident<'ast>) -> Option<&'a V> { + pub fn resolve<'a, Q>(&'a self, k: &Q) -> Option<&'a V> + where + K: Borrow, + Q: Hash + Eq + ?Sized, + { for ctx in self.0.iter().rev() { if let Some(res) = ctx.get(k) { return Some(res); diff --git a/src/common/error.rs b/src/common/error.rs index f3f3023ceaf8..51575a895e91 100644 --- a/src/common/error.rs +++ b/src/common/error.rs @@ -2,7 +2,7 @@ use std::{io, result}; use thiserror::Error; -use crate::{codegen, interpreter, parser}; +use crate::{codegen, interpreter, parser, tc}; #[derive(Error, Debug)] pub enum Error { @@ -18,6 +18,9 @@ pub enum Error { #[error("Compile error: {0}")] CodegenError(#[from] codegen::Error), + #[error("Type error: {0}")] + TypeError(#[from] tc::Error), + #[error("{0}")] Message(String), } @@ -28,6 +31,12 @@ impl From for Error { } } +impl<'a> From<&'a str> for Error { + fn from(s: &'a str) -> Self { + Self::Message(s.to_owned()) + } +} + impl<'a> From>> for Error { fn from(e: nom::Err>) -> Self { use nom::error::Error as NomError; diff --git a/src/compiler.rs b/src/compiler.rs index 5f8e1ef4fa03..f925b267df57 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -8,7 +8,7 @@ use test_strategy::Arbitrary; use crate::codegen::{self, Codegen}; use crate::common::Result; -use crate::parser; +use crate::{parser, tc}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Arbitrary)] pub enum OutputFormat { @@ -55,6 +55,8 @@ pub struct CompilerOptions { pub fn compile_file(input: &Path, output: &Path, options: &CompilerOptions) -> Result<()> { let src = fs::read_to_string(input)?; let (_, decls) = parser::toplevel(&src)?; // TODO: statements + let decls = tc::typecheck_toplevel(decls)?; + let context = codegen::Context::create(); let mut codegen = Codegen::new( &context, diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs index 00421ee90dc8..85a8928cbf9a 100644 --- a/src/interpreter/mod.rs +++ b/src/interpreter/mod.rs @@ -3,14 +3,13 @@ mod value; pub use self::error::{Error, Result}; pub use self::value::{Function, Value}; -use crate::ast::{ - BinaryOperator, Binding, Expr, FunctionType, Ident, Literal, Type, UnaryOperator, -}; +use crate::ast::hir::{Binding, Expr}; +use crate::ast::{BinaryOperator, FunctionType, Ident, Literal, Type, UnaryOperator}; use crate::common::env::Env; #[derive(Debug, Default)] pub struct Interpreter<'a> { - env: Env<'a, Value<'a>>, + env: Env<&'a Ident<'a>, Value<'a>>, } impl<'a> Interpreter<'a> { @@ -25,18 +24,19 @@ impl<'a> Interpreter<'a> { .ok_or_else(|| Error::UndefinedVariable(var.to_owned())) } - pub fn eval(&mut self, expr: &'a Expr<'a>) -> Result> { - match expr { - Expr::Ident(id) => self.resolve(id), - Expr::Literal(Literal::Int(i)) => Ok((*i).into()), - Expr::UnaryOp { op, rhs } => { + pub fn eval(&mut self, expr: &'a Expr<'a, Type>) -> Result> { + let res = match expr { + Expr::Ident(id, _) => self.resolve(id), + Expr::Literal(Literal::Int(i), _) => Ok((*i).into()), + Expr::Literal(Literal::Bool(b), _) => Ok((*b).into()), + Expr::UnaryOp { op, rhs, .. } => { let rhs = self.eval(rhs)?; match op { UnaryOperator::Neg => -rhs, _ => unimplemented!(), } } - Expr::BinaryOp { lhs, op, rhs } => { + Expr::BinaryOp { lhs, op, rhs, .. } => { let lhs = self.eval(lhs)?; let rhs = self.eval(rhs)?; match op { @@ -49,7 +49,7 @@ impl<'a> Interpreter<'a> { BinaryOperator::Neq => todo!(), } } - Expr::Let { bindings, body } => { + Expr::Let { bindings, body, .. } => { self.env.push(); for Binding { ident, body, .. } in bindings { let val = self.eval(body)?; @@ -63,6 +63,7 @@ impl<'a> Interpreter<'a> { condition, then, else_, + .. } => { let condition = self.eval(condition)?; if *(condition.as_type::()?) { @@ -71,7 +72,7 @@ impl<'a> Interpreter<'a> { self.eval(else_) } } - Expr::Call { ref fun, args } => { + Expr::Call { ref fun, args, .. } => { let fun = self.eval(fun)?; let expected_type = FunctionType { args: args.iter().map(|_| Type::Int).collect(), @@ -94,21 +95,26 @@ impl<'a> Interpreter<'a> { } Ok(Value::from(*interpreter.eval(body)?.as_type::()?)) } - Expr::Fun(fun) => Ok(Value::from(value::Function { - // TODO - type_: FunctionType { - args: fun.args.iter().map(|_| Type::Int).collect(), - ret: Box::new(Type::Int), - }, - args: fun.args.iter().map(|arg| arg.to_owned()).collect(), - body: fun.body.to_owned(), - })), - Expr::Ascription { expr, .. } => self.eval(expr), - } + Expr::Fun { args, body, type_ } => { + let type_ = match type_ { + Type::Function(ft) => ft.clone(), + _ => unreachable!("Function expression without function type"), + }; + + Ok(Value::from(value::Function { + // TODO + type_, + args: args.iter().map(|(arg, _)| arg.to_owned()).collect(), + body: (**body).to_owned(), + })) + } + }?; + debug_assert_eq!(&res.type_(), expr.type_()); + Ok(res) } } -pub fn eval<'a>(expr: &'a Expr<'a>) -> Result { +pub fn eval<'a>(expr: &'a Expr<'a, Type>) -> Result { let mut interpreter = Interpreter::new(); interpreter.eval(expr) } @@ -121,17 +127,18 @@ mod tests { use super::*; use BinaryOperator::*; - fn int_lit(i: u64) -> Box> { - Box::new(Expr::Literal(Literal::Int(i))) + fn int_lit(i: u64) -> Box> { + Box::new(Expr::Literal(Literal::Int(i), Type::Int)) } - fn parse_eval(src: &str) -> T + fn do_eval(src: &str) -> T where for<'a> &'a T: TryFrom<&'a Val<'a>>, T: Clone + TypeOf, { let expr = crate::parser::expr(src).unwrap().1; - let res = eval(&expr).unwrap(); + let hir = crate::tc::typecheck_expr(expr).unwrap(); + let res = eval(&hir).unwrap(); res.as_type::().unwrap().clone() } @@ -141,6 +148,7 @@ mod tests { lhs: int_lit(1), op: Mul, rhs: int_lit(2), + type_: Type::Int, }; let res = eval(&expr).unwrap(); assert_eq!(*res.as_type::().unwrap(), 2); @@ -148,19 +156,19 @@ mod tests { #[test] fn variable_shadowing() { - let res = parse_eval::("let x = 1 in (let x = 2 in x) + x"); + let res = do_eval::("let x = 1 in (let x = 2 in x) + x"); assert_eq!(res, 3); } #[test] fn conditional_with_equals() { - let res = parse_eval::("let x = 1 in if x == 1 then 2 else 4"); + let res = do_eval::("let x = 1 in if x == 1 then 2 else 4"); assert_eq!(res, 2); } #[test] fn function_call() { - let res = parse_eval::("let id = fn x = x in id 1"); + let res = do_eval::("let id = fn x = x in id 1"); assert_eq!(res, 1); } } diff --git a/src/interpreter/value.rs b/src/interpreter/value.rs index 496e9c4230de..5e55825160cd 100644 --- a/src/interpreter/value.rs +++ b/src/interpreter/value.rs @@ -6,13 +6,14 @@ use std::rc::Rc; use derive_more::{Deref, From, TryInto}; use super::{Error, Result}; -use crate::ast::{Expr, FunctionType, Ident, Type}; +use crate::ast::hir::Expr; +use crate::ast::{FunctionType, Ident, Type}; #[derive(Debug, Clone)] pub struct Function<'a> { pub type_: FunctionType, pub args: Vec>, - pub body: Expr<'a>, + pub body: Expr<'a, Type>, } #[derive(From, TryInto)] diff --git a/src/main.rs b/src/main.rs index b539ebbb3d99..d5b00d6b6c46 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,7 @@ pub mod compiler; pub mod interpreter; #[macro_use] pub mod parser; +pub mod tc; pub use common::{Error, Result}; @@ -21,6 +22,7 @@ struct Opts { enum Command { Eval(commands::Eval), Compile(commands::Compile), + Check(commands::Check), } fn main() -> anyhow::Result<()> { @@ -28,5 +30,6 @@ fn main() -> anyhow::Result<()> { match opts.subcommand { Command::Eval(eval) => Ok(eval.run()?), Command::Compile(compile) => Ok(compile.run()?), + Command::Check(check) => Ok(check.run()?), } } diff --git a/src/parser/expr.rs b/src/parser/expr.rs index 2fda3e93fae9..73c873b5b304 100644 --- a/src/parser/expr.rs +++ b/src/parser/expr.rs @@ -156,7 +156,14 @@ where named!(int(&str) -> Literal, map!(flat_map!(digit1, parse_to!(u64)), Literal::Int)); -named!(literal(&str) -> Expr, map!(alt!(int), Expr::Literal)); +named!(bool_(&str) -> Literal, alt!( + tag!("true") => { |_| Literal::Bool(true) } | + tag!("false") => { |_| Literal::Bool(false) } +)); + +named!(literal(&str) -> Literal, alt!(int | bool_)); + +named!(literal_expr(&str) -> Expr, map!(literal, Expr::Literal)); named!(binding(&str) -> Binding, do_parse!( multispace0 @@ -262,7 +269,7 @@ named!(fun_expr(&str) -> Expr, do_parse!( named!(arg(&str) -> Expr, alt!( ident_expr | - literal | + literal_expr | paren_expr )); @@ -280,7 +287,7 @@ named!(simple_expr_unascripted(&str) -> Expr, alt!( let_ | if_ | fun_expr | - literal | + literal_expr | ident_expr )); @@ -399,6 +406,18 @@ pub(crate) mod tests { } } + #[test] + fn bools() { + assert_eq!( + test_parse!(expr, "true"), + Expr::Literal(Literal::Bool(true)) + ); + assert_eq!( + test_parse!(expr, "false"), + Expr::Literal(Literal::Bool(false)) + ); + } + #[test] fn let_complex() { let res = test_parse!(expr, "let x = 1; y = x * 7 in (x + y) * 4"); diff --git a/src/parser/macros.rs b/src/parser/macros.rs index 60db5133dc0f..406e5c0e699e 100644 --- a/src/parser/macros.rs +++ b/src/parser/macros.rs @@ -1,3 +1,4 @@ +#[cfg(test)] #[macro_use] macro_rules! test_parse { ($parser: ident, $src: expr) => {{ diff --git a/src/parser/mod.rs b/src/parser/mod.rs index af7dff6ff213..0251d02df464 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -14,7 +14,10 @@ pub use type_::type_; pub type Error = nom::Err>; pub(crate) fn is_reserved(s: &str) -> bool { - matches!(s, "if" | "then" | "else" | "let" | "in" | "fn") + matches!( + s, + "if" | "then" | "else" | "let" | "in" | "fn" | "int" | "float" | "bool" | "true" | "false" + ) } pub(crate) fn ident<'a, E>(i: &'a str) -> nom::IResult<&'a str, Ident, E> diff --git a/src/tc/mod.rs b/src/tc/mod.rs new file mode 100644 index 000000000000..b5acfac2b426 --- /dev/null +++ b/src/tc/mod.rs @@ -0,0 +1,528 @@ +use derive_more::From; +use itertools::Itertools; +use std::collections::HashMap; +use std::convert::{TryFrom, TryInto}; +use std::fmt::{self, Display}; +use std::result; +use thiserror::Error; + +use crate::ast::{self, hir, BinaryOperator, Ident, Literal}; +use crate::common::env::Env; + +#[derive(Debug, Error)] +pub enum Error { + #[error("Undefined variable {0}")] + UndefinedVariable(Ident<'static>), + + #[error("Mismatched types: expected {expected}, but got {actual}")] + TypeMismatch { expected: Type, actual: Type }, + + #[error("Mismatched types, expected numeric type, but got {0}")] + NonNumeric(Type), + + #[error("Ambiguous type {0}")] + AmbiguousType(TyVar), +} + +pub type Result = result::Result; + +#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] +pub struct TyVar(u64); + +impl Display for TyVar { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "t{}", self.0) + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Hash)] +pub struct NullaryType(String); + +impl Display for NullaryType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.0) + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum PrimType { + Int, + Float, + Bool, +} + +impl From for ast::Type { + fn from(pr: PrimType) -> Self { + match pr { + PrimType::Int => ast::Type::Int, + PrimType::Float => ast::Type::Float, + PrimType::Bool => ast::Type::Bool, + } + } +} + +impl Display for PrimType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PrimType::Int => f.write_str("int"), + PrimType::Float => f.write_str("float"), + PrimType::Bool => f.write_str("bool"), + } + } +} + +#[derive(Debug, PartialEq, Eq, Clone, From)] +pub enum Type { + #[from(ignore)] + Univ(TyVar), + #[from(ignore)] + Exist(TyVar), + Nullary(NullaryType), + Prim(PrimType), + Fun { + args: Vec, + ret: Box, + }, +} + +impl PartialEq for Type { + fn eq(&self, other: &ast::Type) -> bool { + match (self, other) { + (Type::Univ(_), _) => todo!(), + (Type::Exist(_), _) => false, + (Type::Nullary(_), _) => todo!(), + (Type::Prim(pr), ty) => ast::Type::from(*pr) == *ty, + (Type::Fun { args, ret }, ast::Type::Function(ft)) => { + *args == ft.args && (**ret).eq(&*ft.ret) + } + (Type::Fun { .. }, _) => false, + } + } +} + +impl TryFrom for ast::Type { + type Error = Type; + + fn try_from(value: Type) -> result::Result { + match value { + Type::Univ(_) => todo!(), + Type::Exist(_) => Err(value), + Type::Nullary(_) => todo!(), + Type::Prim(p) => Ok(p.into()), + Type::Fun { ref args, ref ret } => Ok(ast::Type::Function(ast::FunctionType { + args: args + .clone() + .into_iter() + .map(Self::try_from) + .try_collect() + .map_err(|_| value.clone())?, + ret: Box::new((*ret.clone()).try_into().map_err(|_| value.clone())?), + })), + } + } +} + +const INT: Type = Type::Prim(PrimType::Int); +const FLOAT: Type = Type::Prim(PrimType::Float); +const BOOL: Type = Type::Prim(PrimType::Bool); + +impl Display for Type { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Type::Nullary(nt) => nt.fmt(f), + Type::Prim(p) => p.fmt(f), + Type::Univ(TyVar(n)) => write!(f, "∀{}", n), + Type::Exist(TyVar(n)) => write!(f, "∃{}", n), + Type::Fun { args, ret } => write!(f, "fn {} -> {}", args.iter().join(", "), ret), + } + } +} + +impl From for Type { + fn from(type_: ast::Type) -> Self { + match type_ { + ast::Type::Int => INT, + ast::Type::Float => FLOAT, + ast::Type::Bool => BOOL, + ast::Type::Function(ast::FunctionType { args, ret }) => Type::Fun { + args: args.into_iter().map(Self::from).collect(), + ret: Box::new(Self::from(*ret)), + }, + } + } +} + +struct Typechecker<'ast> { + ty_var_counter: u64, + ctx: HashMap, + env: Env, Type>, +} + +impl<'ast> Typechecker<'ast> { + fn new() -> Self { + Self { + ty_var_counter: 0, + ctx: Default::default(), + env: Default::default(), + } + } + + pub(crate) fn tc_expr(&mut self, expr: ast::Expr<'ast>) -> Result> { + match expr { + ast::Expr::Ident(ident) => { + let type_ = self + .env + .resolve(&ident) + .ok_or_else(|| Error::UndefinedVariable(ident.to_owned()))? + .clone(); + Ok(hir::Expr::Ident(ident, type_)) + } + ast::Expr::Literal(lit) => { + let type_ = match lit { + Literal::Int(_) => Type::Prim(PrimType::Int), + Literal::Bool(_) => Type::Prim(PrimType::Bool), + }; + Ok(hir::Expr::Literal(lit, type_)) + } + ast::Expr::UnaryOp { op, rhs } => todo!(), + ast::Expr::BinaryOp { lhs, op, rhs } => { + let lhs = self.tc_expr(*lhs)?; + let rhs = self.tc_expr(*rhs)?; + let type_ = match op { + BinaryOperator::Equ | BinaryOperator::Neq => { + self.unify(lhs.type_(), rhs.type_())?; + Type::Prim(PrimType::Bool) + } + BinaryOperator::Add | BinaryOperator::Sub | BinaryOperator::Mul => { + let ty = self.unify(lhs.type_(), rhs.type_())?; + // if !matches!(ty, Type::Int | Type::Float) { + // return Err(Error::NonNumeric(ty)); + // } + ty + } + BinaryOperator::Div => todo!(), + BinaryOperator::Pow => todo!(), + }; + Ok(hir::Expr::BinaryOp { + lhs: Box::new(lhs), + op, + rhs: Box::new(rhs), + type_, + }) + } + ast::Expr::Let { bindings, body } => { + self.env.push(); + let bindings = bindings + .into_iter() + .map( + |ast::Binding { ident, type_, body }| -> Result> { + let body = self.tc_expr(body)?; + if let Some(type_) = type_ { + self.unify(body.type_(), &type_.into())?; + } + self.env.set(ident.clone(), body.type_().clone()); + Ok(hir::Binding { + ident, + type_: body.type_().clone(), + body, + }) + }, + ) + .collect::>>>()?; + let body = self.tc_expr(*body)?; + self.env.pop(); + Ok(hir::Expr::Let { + bindings, + type_: body.type_().clone(), + body: Box::new(body), + }) + } + ast::Expr::If { + condition, + then, + else_, + } => { + let condition = self.tc_expr(*condition)?; + self.unify(&Type::Prim(PrimType::Bool), condition.type_())?; + let then = self.tc_expr(*then)?; + let else_ = self.tc_expr(*else_)?; + let type_ = self.unify(then.type_(), else_.type_())?; + Ok(hir::Expr::If { + condition: Box::new(condition), + then: Box::new(then), + else_: Box::new(else_), + type_, + }) + } + ast::Expr::Fun(f) => { + let ast::Fun { args, body } = *f; + self.env.push(); + let args: Vec<_> = args + .into_iter() + .map(|id| { + let ty = self.fresh_ex(); + self.env.set(id.clone(), ty.clone()); + (id, ty) + }) + .collect(); + let body = self.tc_expr(body)?; + self.env.pop(); + Ok(hir::Expr::Fun { + type_: Type::Fun { + args: args.iter().map(|(_, ty)| ty.clone()).collect(), + ret: Box::new(body.type_().clone()), + }, + args, + body: Box::new(body), + }) + } + ast::Expr::Call { fun, args } => { + let ret_ty = self.fresh_ex(); + let arg_tys = args.iter().map(|_| self.fresh_ex()).collect::>(); + let ft = Type::Fun { + args: arg_tys.clone(), + ret: Box::new(ret_ty.clone()), + }; + let fun = self.tc_expr(*fun)?; + self.unify(&ft, fun.type_())?; + let args = args + .into_iter() + .zip(arg_tys) + .map(|(arg, ty)| { + let arg = self.tc_expr(arg)?; + self.unify(&ty, arg.type_())?; + Ok(arg) + }) + .try_collect()?; + Ok(hir::Expr::Call { + fun: Box::new(fun), + args, + type_: ret_ty, + }) + } + ast::Expr::Ascription { expr, type_ } => { + let expr = self.tc_expr(*expr)?; + self.unify(expr.type_(), &type_.into())?; + Ok(expr) + } + } + } + + pub(crate) fn tc_decl(&mut self, decl: ast::Decl<'ast>) -> Result> { + match decl { + ast::Decl::Fun { name, body } => { + let body = self.tc_expr(ast::Expr::Fun(Box::new(body)))?; + let type_ = body.type_().clone(); + self.env.set(name.clone(), type_); + match body { + hir::Expr::Fun { args, body, type_ } => Ok(hir::Decl::Fun { + name, + args, + body, + type_, + }), + _ => unreachable!(), + } + } + } + } + + fn fresh_tv(&mut self) -> TyVar { + self.ty_var_counter += 1; + TyVar(self.ty_var_counter) + } + + fn fresh_ex(&mut self) -> Type { + Type::Exist(self.fresh_tv()) + } + + fn fresh_univ(&mut self) -> Type { + Type::Exist(self.fresh_tv()) + } + + fn universalize<'a>(&mut self, expr: hir::Expr<'a, Type>) -> hir::Expr<'a, Type> { + // TODO + expr + } + + fn unify(&mut self, ty1: &Type, ty2: &Type) -> Result { + match (ty1, ty2) { + (Type::Prim(p1), Type::Prim(p2)) if p1 == p2 => Ok(ty2.clone()), + (Type::Exist(tv), ty) | (ty, Type::Exist(tv)) => match self.resolve_tv(*tv) { + Some(existing_ty) if *ty == existing_ty => Ok(ty.clone()), + Some(existing_ty) => Err(Error::TypeMismatch { + expected: ty.clone(), + actual: existing_ty.into(), + }), + None => match self.ctx.insert(*tv, ty.clone()) { + Some(existing) => self.unify(&existing, ty), + None => Ok(ty.clone()), + }, + }, + (Type::Univ(u1), Type::Univ(u2)) if u1 == u2 => Ok(ty2.clone()), + ( + Type::Fun { + args: args1, + ret: ret1, + }, + Type::Fun { + args: args2, + ret: ret2, + }, + ) => { + let args = args1 + .iter() + .zip(args2) + .map(|(t1, t2)| self.unify(t1, t2)) + .try_collect()?; + let ret = self.unify(ret1, ret2)?; + Ok(Type::Fun { + args, + ret: Box::new(ret), + }) + } + (Type::Nullary(_), _) | (_, Type::Nullary(_)) => todo!(), + _ => Err(Error::TypeMismatch { + expected: ty1.clone(), + actual: ty2.clone(), + }), + } + } + + fn finalize_expr(&self, expr: hir::Expr<'ast, Type>) -> Result> { + expr.traverse_type(|ty| self.finalize_type(ty)) + } + + fn finalize_decl(&self, decl: hir::Decl<'ast, Type>) -> Result> { + decl.traverse_type(|ty| self.finalize_type(ty)) + } + + fn finalize_type(&self, ty: Type) -> Result { + match ty { + Type::Exist(tv) => self.resolve_tv(tv).ok_or(Error::AmbiguousType(tv)), + Type::Univ(tv) => todo!(), + Type::Nullary(_) => todo!(), + Type::Prim(pr) => Ok(pr.into()), + Type::Fun { args, ret } => Ok(ast::Type::Function(ast::FunctionType { + args: args + .into_iter() + .map(|ty| self.finalize_type(ty)) + .try_collect()?, + ret: Box::new(self.finalize_type(*ret)?), + })), + } + } + + fn resolve_tv(&self, tv: TyVar) -> Option { + let mut res = &Type::Exist(tv); + loop { + match res { + Type::Exist(tv) => { + res = self.ctx.get(tv)?; + } + Type::Univ(_) => todo!(), + Type::Nullary(_) => todo!(), + Type::Prim(pr) => break Some((*pr).into()), + Type::Fun { args, ret } => todo!(), + } + } + } +} + +pub fn typecheck_expr(expr: ast::Expr) -> Result> { + let mut typechecker = Typechecker::new(); + let typechecked = typechecker.tc_expr(expr)?; + typechecker.finalize_expr(typechecked) +} + +pub fn typecheck_toplevel(decls: Vec) -> Result>> { + let mut typechecker = Typechecker::new(); + decls + .into_iter() + .map(|decl| { + let decl = typechecker.tc_decl(decl)?; + typechecker.finalize_decl(decl) + }) + .try_collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! assert_type { + ($expr: expr, $type: expr) => { + use crate::parser::{expr, type_}; + let parsed_expr = test_parse!(expr, $expr); + let parsed_type = test_parse!(type_, $type); + let res = typecheck_expr(parsed_expr).unwrap_or_else(|e| panic!("{}", e)); + assert_eq!(res.type_(), &parsed_type); + }; + } + + macro_rules! assert_type_error { + ($expr: expr) => { + use crate::parser::expr; + let parsed_expr = test_parse!(expr, $expr); + let res = typecheck_expr(parsed_expr); + assert!( + res.is_err(), + "Expected type error, but got type: {}", + res.unwrap().type_() + ); + }; + } + + #[test] + fn literal_int() { + assert_type!("1", "int"); + } + + #[test] + fn conditional() { + assert_type!("if 1 == 2 then 3 else 4", "int"); + } + + #[test] + #[ignore] + fn add_bools() { + assert_type_error!("true + false"); + } + + #[test] + fn call_generic_function() { + assert_type!("(fn x = x) 1", "int"); + } + + #[test] + #[ignore] + fn generic_function() { + assert_type!("fn x = x", "fn x, y -> x"); + } + + #[test] + #[ignore] + fn let_generalization() { + assert_type!("let id = fn x = x in if id true then id 1 else 2", "int"); + } + + #[test] + fn concrete_function() { + assert_type!("fn x = x + 1", "fn int -> int"); + } + + #[test] + fn call_concrete_function() { + assert_type!("(fn x = x + 1) 2", "int"); + } + + #[test] + fn conditional_non_bool() { + assert_type_error!("if 3 then true else false"); + } + + #[test] + fn let_int() { + assert_type!("let x = 1 in x", "int"); + } +} -- cgit 1.4.1 From 39656a3801bb311edd9ebb65e92a24fc48f69ec7 Mon Sep 17 00:00:00 2001 From: Griffin Smith Date: Sun, 14 Mar 2021 11:53:13 -0400 Subject: Add string support to the frontend --- src/ast/hir.rs | 4 ++-- src/ast/mod.rs | 19 ++++++++++++++++--- src/codegen/llvm.rs | 1 + src/interpreter/mod.rs | 1 + src/interpreter/value.rs | 23 +++++++++++++++++++++++ src/main.rs | 2 ++ src/parser/expr.rs | 34 ++++++++++++++++++++++++++++++---- src/parser/mod.rs | 12 +++++++++++- src/parser/type_.rs | 2 ++ src/tc/mod.rs | 8 +++++++- 10 files changed, 95 insertions(+), 11 deletions(-) diff --git a/src/ast/hir.rs b/src/ast/hir.rs index 151ddd529872..9db6919f6f53 100644 --- a/src/ast/hir.rs +++ b/src/ast/hir.rs @@ -26,7 +26,7 @@ impl<'a, T> Binding<'a, T> { pub enum Expr<'a, T> { Ident(Ident<'a>, T), - Literal(Literal, T), + Literal(Literal<'a>, T), UnaryOp { op: UnaryOperator, @@ -158,7 +158,7 @@ impl<'a, T> Expr<'a, T> { { match self { Expr::Ident(id, t) => Expr::Ident(id.to_owned(), t.clone()), - Expr::Literal(lit, t) => Expr::Literal(lit.clone(), t.clone()), + Expr::Literal(lit, t) => Expr::Literal(lit.to_owned(), t.clone()), Expr::UnaryOp { op, rhs, type_ } => Expr::UnaryOp { op: *op, rhs: Box::new((**rhs).to_owned()), diff --git a/src/ast/mod.rs b/src/ast/mod.rs index cef366d16e04..5526c5348350 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -107,9 +107,20 @@ pub enum UnaryOperator { } #[derive(Debug, PartialEq, Eq, Clone)] -pub enum Literal { +pub enum Literal<'a> { Int(u64), Bool(bool), + String(Cow<'a, str>), +} + +impl<'a> Literal<'a> { + pub fn to_owned(&self) -> Literal<'static> { + match self { + Literal::Int(i) => Literal::Int(*i), + Literal::Bool(b) => Literal::Bool(*b), + Literal::String(s) => Literal::String(Cow::Owned(s.clone().into_owned())), + } + } } #[derive(Debug, PartialEq, Eq, Clone)] @@ -133,7 +144,7 @@ impl<'a> Binding<'a> { pub enum Expr<'a> { Ident(Ident<'a>), - Literal(Literal), + Literal(Literal<'a>), UnaryOp { op: UnaryOperator, @@ -174,7 +185,7 @@ impl<'a> Expr<'a> { pub fn to_owned(&self) -> Expr<'static> { match self { Expr::Ident(ref id) => Expr::Ident(id.to_owned()), - Expr::Literal(ref lit) => Expr::Literal(lit.clone()), + Expr::Literal(ref lit) => Expr::Literal(lit.to_owned()), Expr::UnaryOp { op, rhs } => Expr::UnaryOp { op: *op, rhs: Box::new((**rhs).to_owned()), @@ -247,6 +258,7 @@ pub enum Type { Int, Float, Bool, + CString, Function(FunctionType), } @@ -256,6 +268,7 @@ impl Display for Type { Type::Int => f.write_str("int"), Type::Float => f.write_str("float"), Type::Bool => f.write_str("bool"), + Type::CString => f.write_str("cstring"), Type::Function(ft) => ft.fmt(f), } } diff --git a/src/codegen/llvm.rs b/src/codegen/llvm.rs index 5b5db90a1ad8..ee087845b640 100644 --- a/src/codegen/llvm.rs +++ b/src/codegen/llvm.rs @@ -92,6 +92,7 @@ impl<'ctx, 'ast> Codegen<'ctx, 'ast> { Literal::Bool(b) => Ok(AnyValueEnum::IntValue( ty.const_int(if *b { 1 } else { 0 }, false), )), + Literal::String(_) => todo!(), } } Expr::UnaryOp { op, rhs, .. } => { diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs index 85a8928cbf9a..d414dedf8560 100644 --- a/src/interpreter/mod.rs +++ b/src/interpreter/mod.rs @@ -29,6 +29,7 @@ impl<'a> Interpreter<'a> { Expr::Ident(id, _) => self.resolve(id), Expr::Literal(Literal::Int(i), _) => Ok((*i).into()), Expr::Literal(Literal::Bool(b), _) => Ok((*b).into()), + Expr::Literal(Literal::String(s), _) => Ok(s.clone().into()), Expr::UnaryOp { op, rhs, .. } => { let rhs = self.eval(rhs)?; match op { diff --git a/src/interpreter/value.rs b/src/interpreter/value.rs index 5e55825160cd..a1a579aec8db 100644 --- a/src/interpreter/value.rs +++ b/src/interpreter/value.rs @@ -1,7 +1,9 @@ +use std::borrow::Cow; use std::convert::TryFrom; use std::fmt::{self, Display}; use std::ops::{Add, Div, Mul, Neg, Sub}; use std::rc::Rc; +use std::result; use derive_more::{Deref, From, TryInto}; @@ -22,15 +24,28 @@ pub enum Val<'a> { Int(i64), Float(f64), Bool(bool), + String(Cow<'a, str>), Function(Function<'a>), } +impl<'a> TryFrom> for String { + type Error = (); + + fn try_from(value: Val<'a>) -> result::Result { + match value { + Val::String(s) => Ok(s.into_owned()), + _ => Err(()), + } + } +} + impl<'a> fmt::Debug for Val<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Val::Int(x) => f.debug_tuple("Int").field(x).finish(), Val::Float(x) => f.debug_tuple("Float").field(x).finish(), Val::Bool(x) => f.debug_tuple("Bool").field(x).finish(), + Val::String(s) => f.debug_tuple("String").field(s).finish(), Val::Function(Function { type_, .. }) => { f.debug_struct("Function").field("type_", type_).finish() } @@ -62,6 +77,7 @@ impl<'a> Display for Val<'a> { Val::Int(x) => x.fmt(f), Val::Float(x) => x.fmt(f), Val::Bool(x) => x.fmt(f), + Val::String(s) => write!(f, "{:?}", s), Val::Function(Function { type_, .. }) => write!(f, "<{}>", type_), } } @@ -73,6 +89,7 @@ impl<'a> Val<'a> { Val::Int(_) => Type::Int, Val::Float(_) => Type::Float, Val::Bool(_) => Type::Bool, + Val::String(_) => Type::CString, Val::Function(Function { type_, .. }) => Type::Function(type_.clone()), } } @@ -178,3 +195,9 @@ impl TypeOf for f64 { Type::Float } } + +impl TypeOf for String { + fn type_of() -> Type { + Type::CString + } +} diff --git a/src/main.rs b/src/main.rs index d5b00d6b6c46..d476b96ed634 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +#![feature(str_split_once)] + use clap::Clap; pub mod ast; diff --git a/src/parser/expr.rs b/src/parser/expr.rs index 73c873b5b304..2ec6d60cd04f 100644 --- a/src/parser/expr.rs +++ b/src/parser/expr.rs @@ -1,11 +1,14 @@ +use std::borrow::Cow; + +use nom::alt; use nom::character::complete::{digit1, multispace0, multispace1}; use nom::{ - alt, call, char, complete, delimited, do_parse, flat_map, many0, map, named, opt, parse_to, + call, char, complete, delimited, do_parse, flat_map, many0, map, named, opt, parse_to, preceded, separated_list0, separated_list1, tag, tuple, }; use pratt::{Affix, Associativity, PrattParser, Precedence}; -use crate::ast::{BinaryOperator, Binding, Expr, Fun, Ident, Literal, UnaryOperator}; +use crate::ast::{BinaryOperator, Binding, Expr, Fun, Literal, UnaryOperator}; use crate::parser::{ident, type_}; #[derive(Debug)] @@ -161,7 +164,22 @@ named!(bool_(&str) -> Literal, alt!( tag!("false") => { |_| Literal::Bool(false) } )); -named!(literal(&str) -> Literal, alt!(int | bool_)); +fn string_internal(i: &str) -> nom::IResult<&str, Cow<'_, str>, nom::error::Error<&str>> { + let (s, rem) = i + .split_once('"') + .ok_or_else(|| nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::Tag)))?; + Ok((rem, Cow::Borrowed(s))) +} + +named!(string(&str) -> Literal, preceded!( + char!('"'), + map!( + string_internal, + |s| Literal::String(s) + ) +)); + +named!(literal(&str) -> Literal, alt!(int | bool_ | string)); named!(literal_expr(&str) -> Expr, map!(literal, Expr::Literal)); @@ -308,7 +326,7 @@ named!(pub expr(&str) -> Expr, alt!( #[cfg(test)] pub(crate) mod tests { use super::*; - use crate::ast::Type; + use crate::ast::{Ident, Type}; use std::convert::TryFrom; use BinaryOperator::*; use Expr::{BinaryOp, If, Let, UnaryOp}; @@ -418,6 +436,14 @@ pub(crate) mod tests { ); } + #[test] + fn simple_string_lit() { + assert_eq!( + test_parse!(expr, "\"foobar\""), + Expr::Literal(Literal::String(Cow::Borrowed("foobar"))) + ) + } + #[test] fn let_complex() { let res = test_parse!(expr, "let x = 1; y = x * 7 in (x + y) * 4"); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0251d02df464..9ac590cee86c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -16,7 +16,17 @@ pub type Error = nom::Err>; pub(crate) fn is_reserved(s: &str) -> bool { matches!( s, - "if" | "then" | "else" | "let" | "in" | "fn" | "int" | "float" | "bool" | "true" | "false" + "if" | "then" + | "else" + | "let" + | "in" + | "fn" + | "int" + | "float" + | "bool" + | "true" + | "false" + | "cstring" ) } diff --git a/src/parser/type_.rs b/src/parser/type_.rs index 076df7d6bd55..66b4f9f72c23 100644 --- a/src/parser/type_.rs +++ b/src/parser/type_.rs @@ -27,6 +27,7 @@ named!(pub type_(&str) -> Type, alt!( tag!("int") => { |_| Type::Int } | tag!("float") => { |_| Type::Float } | tag!("bool") => { |_| Type::Bool } | + tag!("cstring") => { |_| Type::CString } | function_type | delimited!( tuple!(tag!("("), multispace0), @@ -44,6 +45,7 @@ mod tests { assert_eq!(test_parse!(type_, "int"), Type::Int); assert_eq!(test_parse!(type_, "float"), Type::Float); assert_eq!(test_parse!(type_, "bool"), Type::Bool); + assert_eq!(test_parse!(type_, "cstring"), Type::CString); } #[test] diff --git a/src/tc/mod.rs b/src/tc/mod.rs index b5acfac2b426..2c40a02bf7c6 100644 --- a/src/tc/mod.rs +++ b/src/tc/mod.rs @@ -49,6 +49,7 @@ pub enum PrimType { Int, Float, Bool, + CString, } impl From for ast::Type { @@ -57,6 +58,7 @@ impl From for ast::Type { PrimType::Int => ast::Type::Int, PrimType::Float => ast::Type::Float, PrimType::Bool => ast::Type::Bool, + PrimType::CString => ast::Type::CString, } } } @@ -67,6 +69,7 @@ impl Display for PrimType { PrimType::Int => f.write_str("int"), PrimType::Float => f.write_str("float"), PrimType::Bool => f.write_str("bool"), + PrimType::CString => f.write_str("cstring"), } } } @@ -125,6 +128,7 @@ impl TryFrom for ast::Type { const INT: Type = Type::Prim(PrimType::Int); const FLOAT: Type = Type::Prim(PrimType::Float); const BOOL: Type = Type::Prim(PrimType::Bool); +const CSTRING: Type = Type::Prim(PrimType::CString); impl Display for Type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -144,6 +148,7 @@ impl From for Type { ast::Type::Int => INT, ast::Type::Float => FLOAT, ast::Type::Bool => BOOL, + ast::Type::CString => CSTRING, ast::Type::Function(ast::FunctionType { args, ret }) => Type::Fun { args: args.into_iter().map(Self::from).collect(), ret: Box::new(Self::from(*ret)), @@ -181,8 +186,9 @@ impl<'ast> Typechecker<'ast> { let type_ = match lit { Literal::Int(_) => Type::Prim(PrimType::Int), Literal::Bool(_) => Type::Prim(PrimType::Bool), + Literal::String(_) => Type::Prim(PrimType::CString), }; - Ok(hir::Expr::Literal(lit, type_)) + Ok(hir::Expr::Literal(lit.to_owned(), type_)) } ast::Expr::UnaryOp { op, rhs } => todo!(), ast::Expr::BinaryOp { lhs, op, rhs } => { -- cgit 1.4.1 From 7960c3270e1a338f4da40d044a6896df96d82c79 Mon Sep 17 00:00:00 2001 From: Griffin Smith Date: Sun, 14 Mar 2021 12:27:28 -0400 Subject: Make string and bool parsing complete --- src/parser/expr.rs | 6 +++--- src/parser/mod.rs | 9 +++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/parser/expr.rs b/src/parser/expr.rs index 2ec6d60cd04f..fd37fcb9c67c 100644 --- a/src/parser/expr.rs +++ b/src/parser/expr.rs @@ -160,8 +160,8 @@ where named!(int(&str) -> Literal, map!(flat_map!(digit1, parse_to!(u64)), Literal::Int)); named!(bool_(&str) -> Literal, alt!( - tag!("true") => { |_| Literal::Bool(true) } | - tag!("false") => { |_| Literal::Bool(false) } + complete!(tag!("true")) => { |_| Literal::Bool(true) } | + complete!(tag!("false")) => { |_| Literal::Bool(false) } )); fn string_internal(i: &str) -> nom::IResult<&str, Cow<'_, str>, nom::error::Error<&str>> { @@ -172,7 +172,7 @@ fn string_internal(i: &str) -> nom::IResult<&str, Cow<'_, str>, nom::error::Erro } named!(string(&str) -> Literal, preceded!( - char!('"'), + complete!(char!('"')), map!( string_internal, |s| Literal::String(s) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9ac590cee86c..9c4598732247 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,6 +1,6 @@ use nom::character::complete::{multispace0, multispace1}; use nom::error::{ErrorKind, ParseError}; -use nom::{alt, char, complete, do_parse, many0, named, separated_list0, tag}; +use nom::{alt, char, complete, do_parse, many0, named, separated_list0, tag, terminated}; #[macro_use] mod macros; @@ -81,7 +81,7 @@ named!(pub decl(&str) -> Decl, alt!( fun_decl )); -named!(pub toplevel(&str) -> Vec, many0!(decl)); +named!(pub toplevel(&str) -> Vec, terminated!(many0!(decl), multispace0)); #[cfg(test)] mod tests { @@ -114,5 +114,10 @@ mod tests { fn main = plus (id 2) 7" ); assert_eq!(res.len(), 3); + let res = test_parse!( + toplevel, + "fn id x = x\nfn plus x y = x + y\nfn main = plus (id 2) 7\n" + ); + assert_eq!(res.len(), 3); } } -- cgit 1.4.1 From ecb4c0f803e9b408e4fd21c475769eb4dc649d14 Mon Sep 17 00:00:00 2001 From: Griffin Smith Date: Sun, 14 Mar 2021 16:43:47 -0400 Subject: Universally quantified type variables Implement universally quantified type variables, both explicitly given by the user and inferred by the type inference algorithm. --- Cargo.lock | 14 +++ Cargo.toml | 4 + ach/functions.ach | 2 +- src/ast/hir.rs | 6 ++ src/ast/mod.rs | 162 +++++++++++++++++++++++++--- src/commands/check.rs | 6 +- src/common/mod.rs | 2 + src/common/namer.rs | 122 +++++++++++++++++++++ src/interpreter/error.rs | 5 +- src/interpreter/mod.rs | 5 +- src/interpreter/value.rs | 20 ++-- src/main.rs | 1 + src/parser/expr.rs | 14 +-- src/parser/mod.rs | 50 ++++++++- src/parser/type_.rs | 19 ++++ src/tc/mod.rs | 274 ++++++++++++++++++++++++++++++++++------------- tests/compile.rs | 41 +++++++ 17 files changed, 635 insertions(+), 112 deletions(-) create mode 100644 src/common/namer.rs create mode 100644 tests/compile.rs diff --git a/Cargo.lock b/Cargo.lock index 8ec5ad6cf952..0c5779135a5f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,7 +5,9 @@ name = "achilles" version = "0.1.0" dependencies = [ "anyhow", + "bimap", "clap", + "crate-root", "derive_more", "inkwell", "itertools", @@ -57,6 +59,12 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +[[package]] +name = "bimap" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f92b72b8f03128773278bf74418b9205f3d2a12c39a61f92395f47af390c32bf" + [[package]] name = "bit-set" version = "0.5.2" @@ -140,6 +148,12 @@ dependencies = [ "syn", ] +[[package]] +name = "crate-root" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59c6fe4622b269032d2c5140a592d67a9c409031d286174fcde172fbed86f0d3" + [[package]] name = "derive_more" version = "0.99.11" diff --git a/Cargo.toml b/Cargo.toml index 2ac7d2540961..c0ba4d137a9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ edition = "2018" [dependencies] anyhow = "1.0.38" +bimap = "0.6.0" clap = "3.0.0-beta.2" derive_more = "0.99.11" inkwell = { git = "https://github.com/TheDan64/inkwell", branch = "master", features = ["llvm11-0"] } @@ -18,3 +19,6 @@ pratt = "0.3.0" proptest = "1.0.0" test-strategy = "0.1.1" thiserror = "1.0.24" + +[dev-dependencies] +crate-root = "0.1.3" diff --git a/ach/functions.ach b/ach/functions.ach index 8d91564c1559..0d2f07eff574 100644 --- a/ach/functions.ach +++ b/ach/functions.ach @@ -1,3 +1,3 @@ fn id x = x -fn plus x y = x + y +fn plus (x: int) (y: int) = x + y fn main = plus (id 2) 7 diff --git a/src/ast/hir.rs b/src/ast/hir.rs index 9db6919f6f53..6859174a2dd0 100644 --- a/src/ast/hir.rs +++ b/src/ast/hir.rs @@ -222,6 +222,12 @@ pub enum Decl<'a, T> { } impl<'a, T> Decl<'a, T> { + pub fn type_(&self) -> &T { + match self { + Decl::Fun { type_, .. } => type_, + } + } + pub fn traverse_type(self, f: F) -> Result, E> where F: Fn(T) -> Result + Clone, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 5526c5348350..1884ba69f43c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1,6 +1,7 @@ pub(crate) mod hir; use std::borrow::Cow; +use std::collections::HashMap; use std::convert::TryFrom; use std::fmt::{self, Display, Formatter}; @@ -126,7 +127,7 @@ impl<'a> Literal<'a> { #[derive(Debug, PartialEq, Eq, Clone)] pub struct Binding<'a> { pub ident: Ident<'a>, - pub type_: Option, + pub type_: Option>, pub body: Expr<'a>, } @@ -134,7 +135,7 @@ impl<'a> Binding<'a> { fn to_owned(&self) -> Binding<'static> { Binding { ident: self.ident.to_owned(), - type_: self.type_.clone(), + type_: self.type_.as_ref().map(|t| t.to_owned()), body: self.body.to_owned(), } } @@ -177,7 +178,7 @@ pub enum Expr<'a> { Ascription { expr: Box>, - type_: Type, + type_: Type<'a>, }, } @@ -215,20 +216,46 @@ impl<'a> Expr<'a> { }, Expr::Ascription { expr, type_ } => Expr::Ascription { expr: Box::new((**expr).to_owned()), - type_: type_.clone(), + type_: type_.to_owned(), }, } } } +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Arg<'a> { + pub ident: Ident<'a>, + pub type_: Option>, +} + +impl<'a> Arg<'a> { + pub fn to_owned(&self) -> Arg<'static> { + Arg { + ident: self.ident.to_owned(), + type_: self.type_.as_ref().map(Type::to_owned), + } + } +} + +impl<'a> TryFrom<&'a str> for Arg<'a> { + type Error = as TryFrom<&'a str>>::Error; + + fn try_from(value: &'a str) -> Result { + Ok(Arg { + ident: Ident::try_from(value)?, + type_: None, + }) + } +} + #[derive(Debug, PartialEq, Eq, Clone)] pub struct Fun<'a> { - pub args: Vec>, + pub args: Vec>, pub body: Expr<'a>, } impl<'a> Fun<'a> { - fn to_owned(&self) -> Fun<'static> { + pub fn to_owned(&self) -> Fun<'static> { Fun { args: self.args.iter().map(|arg| arg.to_owned()).collect(), body: self.body.to_owned(), @@ -236,40 +263,147 @@ impl<'a> Fun<'a> { } } -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, Clone)] pub enum Decl<'a> { Fun { name: Ident<'a>, body: Fun<'a> }, } +//// + #[derive(Debug, PartialEq, Eq, Clone)] -pub struct FunctionType { - pub args: Vec, - pub ret: Box, +pub struct FunctionType<'a> { + pub args: Vec>, + pub ret: Box>, } -impl Display for FunctionType { +impl<'a> FunctionType<'a> { + pub fn to_owned(&self) -> FunctionType<'static> { + FunctionType { + args: self.args.iter().map(|a| a.to_owned()).collect(), + ret: Box::new((*self.ret).to_owned()), + } + } +} + +impl<'a> Display for FunctionType<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "fn {} -> {}", self.args.iter().join(", "), self.ret) } } #[derive(Debug, PartialEq, Eq, Clone)] -pub enum Type { +pub enum Type<'a> { Int, Float, Bool, CString, - Function(FunctionType), + Var(Ident<'a>), + Function(FunctionType<'a>), } -impl Display for Type { +impl<'a> Type<'a> { + pub fn to_owned(&self) -> Type<'static> { + match self { + Type::Int => Type::Int, + Type::Float => Type::Float, + Type::Bool => Type::Bool, + Type::CString => Type::CString, + Type::Var(v) => Type::Var(v.to_owned()), + Type::Function(f) => Type::Function(f.to_owned()), + } + } + + pub fn alpha_equiv(&self, other: &Self) -> bool { + fn do_alpha_equiv<'a>( + substs: &mut HashMap<&'a Ident<'a>, &'a Ident<'a>>, + lhs: &'a Type, + rhs: &'a Type, + ) -> bool { + match (lhs, rhs) { + (Type::Var(v1), Type::Var(v2)) => substs.entry(v1).or_insert(v2) == &v2, + ( + Type::Function(FunctionType { + args: args1, + ret: ret1, + }), + Type::Function(FunctionType { + args: args2, + ret: ret2, + }), + ) => { + args1.len() == args2.len() + && args1 + .iter() + .zip(args2) + .all(|(a1, a2)| do_alpha_equiv(substs, a1, a2)) + && do_alpha_equiv(substs, ret1, ret2) + } + _ => lhs == rhs, + } + } + + let mut substs = HashMap::new(); + do_alpha_equiv(&mut substs, self, other) + } +} + +impl<'a> Display for Type<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Type::Int => f.write_str("int"), Type::Float => f.write_str("float"), Type::Bool => f.write_str("bool"), Type::CString => f.write_str("cstring"), + Type::Var(v) => v.fmt(f), Type::Function(ft) => ft.fmt(f), } } } + +#[cfg(test)] +mod tests { + use super::*; + + fn type_var(n: &str) -> Type<'static> { + Type::Var(Ident::try_from(n.to_owned()).unwrap()) + } + + mod alpha_equiv { + use super::*; + + #[test] + fn trivial() { + assert!(Type::Int.alpha_equiv(&Type::Int)); + assert!(!Type::Int.alpha_equiv(&Type::Bool)); + } + + #[test] + fn simple_type_var() { + assert!(type_var("a").alpha_equiv(&type_var("b"))); + } + + #[test] + fn function_with_type_vars_equiv() { + assert!(Type::Function(FunctionType { + args: vec![type_var("a")], + ret: Box::new(type_var("b")), + }) + .alpha_equiv(&Type::Function(FunctionType { + args: vec![type_var("b")], + ret: Box::new(type_var("a")), + }))) + } + + #[test] + fn function_with_type_vars_non_equiv() { + assert!(!Type::Function(FunctionType { + args: vec![type_var("a")], + ret: Box::new(type_var("a")), + }) + .alpha_equiv(&Type::Function(FunctionType { + args: vec![type_var("b")], + ret: Box::new(type_var("a")), + }))) + } + } +} diff --git a/src/commands/check.rs b/src/commands/check.rs index 40de288a282c..0bea482c1478 100644 --- a/src/commands/check.rs +++ b/src/commands/check.rs @@ -15,13 +15,13 @@ pub struct Check { expr: Option, } -fn run_expr(expr: String) -> Result { +fn run_expr(expr: String) -> Result> { let (_, parsed) = parser::expr(&expr)?; let hir_expr = tc::typecheck_expr(parsed)?; - Ok(hir_expr.type_().clone()) + Ok(hir_expr.type_().to_owned()) } -fn run_path(path: PathBuf) -> Result { +fn run_path(path: PathBuf) -> Result> { todo!() } diff --git a/src/common/mod.rs b/src/common/mod.rs index af5974a116fb..8368a6dd180f 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -1,4 +1,6 @@ pub(crate) mod env; pub(crate) mod error; +pub(crate) mod namer; pub use error::{Error, Result}; +pub use namer::{Namer, NamerOf}; diff --git a/src/common/namer.rs b/src/common/namer.rs new file mode 100644 index 000000000000..016e9f6ed99a --- /dev/null +++ b/src/common/namer.rs @@ -0,0 +1,122 @@ +use std::fmt::Display; +use std::marker::PhantomData; + +pub struct Namer { + make_name: F, + counter: u64, + _phantom: PhantomData, +} + +impl Namer { + pub fn new(make_name: F) -> Self { + Namer { + make_name, + counter: 0, + _phantom: PhantomData, + } + } +} + +impl Namer String>> { + pub fn with_prefix(prefix: T) -> Self + where + T: Display + 'static, + { + Namer::new(move |i| format!("{}{}", prefix, i)).boxed() + } + + pub fn with_suffix(suffix: T) -> Self + where + T: Display + 'static, + { + Namer::new(move |i| format!("{}{}", i, suffix)).boxed() + } + + pub fn alphabetic() -> Self { + Namer::new(|i| { + if i <= 26 { + std::char::from_u32((i + 96) as u32).unwrap().to_string() + } else { + format!( + "{}{}", + std::char::from_u32(((i % 26) + 96) as u32).unwrap(), + i - 26 + ) + } + }) + .boxed() + } +} + +impl Namer +where + F: Fn(u64) -> T, +{ + pub fn make_name(&mut self) -> T { + self.counter += 1; + (self.make_name)(self.counter) + } + + pub fn boxed(self) -> NamerOf + where + F: 'static, + { + Namer { + make_name: Box::new(self.make_name), + counter: self.counter, + _phantom: self._phantom, + } + } + + pub fn map(self, f: G) -> NamerOf + where + G: Fn(T) -> U + 'static, + T: 'static, + F: 'static, + { + Namer { + counter: self.counter, + make_name: Box::new(move |x| f((self.make_name)(x))), + _phantom: PhantomData, + } + } +} + +pub type NamerOf = Namer T>>; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn prefix() { + let mut namer = Namer::with_prefix("t"); + assert_eq!(namer.make_name(), "t1"); + assert_eq!(namer.make_name(), "t2"); + } + + #[test] + fn suffix() { + let mut namer = Namer::with_suffix("t"); + assert_eq!(namer.make_name(), "1t"); + assert_eq!(namer.make_name(), "2t"); + } + + #[test] + fn alphabetic() { + let mut namer = Namer::alphabetic(); + assert_eq!(namer.make_name(), "a"); + assert_eq!(namer.make_name(), "b"); + (0..25).for_each(|_| { + namer.make_name(); + }); + assert_eq!(namer.make_name(), "b2"); + } + + #[test] + fn custom_callback() { + let mut namer = Namer::new(|n| n + 1); + assert_eq!(namer.make_name(), 2); + assert_eq!(namer.make_name(), 3); + } +} diff --git a/src/interpreter/error.rs b/src/interpreter/error.rs index e0299d180553..268d6f479a1e 100644 --- a/src/interpreter/error.rs +++ b/src/interpreter/error.rs @@ -10,7 +10,10 @@ pub enum Error { UndefinedVariable(Ident<'static>), #[error("Unexpected type {actual}, expected type {expected}")] - InvalidType { actual: Type, expected: Type }, + InvalidType { + actual: Type<'static>, + expected: Type<'static>, + }, } pub type Result = result::Result; diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs index d414dedf8560..3bfeeb52e85c 100644 --- a/src/interpreter/mod.rs +++ b/src/interpreter/mod.rs @@ -115,7 +115,7 @@ impl<'a> Interpreter<'a> { } } -pub fn eval<'a>(expr: &'a Expr<'a, Type>) -> Result { +pub fn eval<'a>(expr: &'a Expr<'a, Type>) -> Result> { let mut interpreter = Interpreter::new(); interpreter.eval(expr) } @@ -128,7 +128,7 @@ mod tests { use super::*; use BinaryOperator::*; - fn int_lit(i: u64) -> Box> { + fn int_lit(i: u64) -> Box>> { Box::new(Expr::Literal(Literal::Int(i), Type::Int)) } @@ -168,6 +168,7 @@ mod tests { } #[test] + #[ignore] fn function_call() { let res = do_eval::("let id = fn x = x in id 1"); assert_eq!(res, 1); diff --git a/src/interpreter/value.rs b/src/interpreter/value.rs index a1a579aec8db..55ba42f9de58 100644 --- a/src/interpreter/value.rs +++ b/src/interpreter/value.rs @@ -13,9 +13,9 @@ use crate::ast::{FunctionType, Ident, Type}; #[derive(Debug, Clone)] pub struct Function<'a> { - pub type_: FunctionType, + pub type_: FunctionType<'a>, pub args: Vec>, - pub body: Expr<'a, Type>, + pub body: Expr<'a, Type<'a>>, } #[derive(From, TryInto)] @@ -100,7 +100,7 @@ impl<'a> Val<'a> { &'b T: TryFrom<&'b Self>, { <&T>::try_from(self).map_err(|_| Error::InvalidType { - actual: self.type_(), + actual: self.type_().to_owned(), expected: ::type_of(), }) } @@ -109,8 +109,8 @@ impl<'a> Val<'a> { match self { Val::Function(f) if f.type_ == function_type => Ok(&f), _ => Err(Error::InvalidType { - actual: self.type_(), - expected: Type::Function(function_type), + actual: self.type_().to_owned(), + expected: Type::Function(function_type.to_owned()), }), } } @@ -175,29 +175,29 @@ impl<'a> Div for Value<'a> { } pub trait TypeOf { - fn type_of() -> Type; + fn type_of() -> Type<'static>; } impl TypeOf for i64 { - fn type_of() -> Type { + fn type_of() -> Type<'static> { Type::Int } } impl TypeOf for bool { - fn type_of() -> Type { + fn type_of() -> Type<'static> { Type::Bool } } impl TypeOf for f64 { - fn type_of() -> Type { + fn type_of() -> Type<'static> { Type::Float } } impl TypeOf for String { - fn type_of() -> Type { + fn type_of() -> Type<'static> { Type::CString } } diff --git a/src/main.rs b/src/main.rs index d476b96ed634..4ba0aaf33e91 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ #![feature(str_split_once)] +#![feature(or_insert_with_key)] use clap::Clap; diff --git a/src/parser/expr.rs b/src/parser/expr.rs index fd37fcb9c67c..12c55df02b80 100644 --- a/src/parser/expr.rs +++ b/src/parser/expr.rs @@ -9,7 +9,7 @@ use nom::{ use pratt::{Affix, Associativity, PrattParser, Precedence}; use crate::ast::{BinaryOperator, Binding, Expr, Fun, Literal, UnaryOperator}; -use crate::parser::{ident, type_}; +use crate::parser::{arg, ident, type_}; #[derive(Debug)] enum TokenTree<'a> { @@ -274,7 +274,7 @@ named!(no_arg_call(&str) -> Expr, do_parse!( named!(fun_expr(&str) -> Expr, do_parse!( tag!("fn") >> multispace1 - >> args: separated_list0!(multispace1, ident) + >> args: separated_list0!(multispace1, arg) >> multispace0 >> char!('=') >> multispace0 @@ -285,7 +285,7 @@ named!(fun_expr(&str) -> Expr, do_parse!( }))) )); -named!(arg(&str) -> Expr, alt!( +named!(fn_arg(&str) -> Expr, alt!( ident_expr | literal_expr | paren_expr @@ -294,7 +294,7 @@ named!(arg(&str) -> Expr, alt!( named!(call_with_args(&str) -> Expr, do_parse!( fun: funcref >> multispace1 - >> args: separated_list1!(multispace1, arg) + >> args: separated_list1!(multispace1, fn_arg) >> (Expr::Call { fun: Box::new(fun), args @@ -326,7 +326,7 @@ named!(pub expr(&str) -> Expr, alt!( #[cfg(test)] pub(crate) mod tests { use super::*; - use crate::ast::{Ident, Type}; + use crate::ast::{Arg, Ident, Type}; use std::convert::TryFrom; use BinaryOperator::*; use Expr::{BinaryOp, If, Let, UnaryOp}; @@ -549,7 +549,7 @@ pub(crate) mod tests { ident: Ident::try_from("id").unwrap(), type_: None, body: Expr::Fun(Box::new(Fun { - args: vec![Ident::try_from("x").unwrap()], + args: vec![Arg::try_from("x").unwrap()], body: *ident_expr("x") })) }], @@ -586,7 +586,7 @@ pub(crate) mod tests { ident: Ident::try_from("const_1").unwrap(), type_: None, body: Expr::Fun(Box::new(Fun { - args: vec![Ident::try_from("x").unwrap()], + args: vec![Arg::try_from("x").unwrap()], body: Expr::Ascription { expr: Box::new(Expr::Literal(Literal::Int(1))), type_: Type::Int, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9c4598732247..8599ccabfc23 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7,7 +7,7 @@ mod macros; mod expr; mod type_; -use crate::ast::{Decl, Fun, Ident}; +use crate::ast::{Arg, Decl, Fun, Ident}; pub use expr::expr; pub use type_::type_; @@ -58,12 +58,33 @@ where } } +named!(ascripted_arg(&str) -> Arg, do_parse!( + complete!(char!('(')) >> + multispace0 >> + ident: ident >> + multispace0 >> + complete!(char!(':')) >> + multispace0 >> + type_: type_ >> + multispace0 >> + complete!(char!(')')) >> + (Arg { + ident, + type_: Some(type_) + }) +)); + +named!(arg(&str) -> Arg, alt!( + ident => { |ident| Arg {ident, type_: None}} | + ascripted_arg +)); + named!(fun_decl(&str) -> Decl, do_parse!( complete!(tag!("fn")) >> multispace0 >> name: ident >> multispace1 - >> args: separated_list0!(multispace1, ident) + >> args: separated_list0!(multispace1, arg) >> multispace0 >> char!('=') >> multispace0 @@ -87,6 +108,8 @@ named!(pub toplevel(&str) -> Vec, terminated!(many0!(decl), multispace0)); mod tests { use std::convert::TryInto; + use crate::ast::{BinaryOperator, Expr, Literal, Type}; + use super::*; use expr::tests::ident_expr; @@ -105,6 +128,29 @@ mod tests { ) } + #[test] + fn ascripted_fn_args() { + test_parse!(ascripted_arg, "(x : int)"); + let res = test_parse!(decl, "fn plus1 (x : int) = x + 1"); + assert_eq!( + res, + Decl::Fun { + name: "plus1".try_into().unwrap(), + body: Fun { + args: vec![Arg { + ident: "x".try_into().unwrap(), + type_: Some(Type::Int), + }], + body: Expr::BinaryOp { + lhs: ident_expr("x"), + op: BinaryOperator::Add, + rhs: Box::new(Expr::Literal(Literal::Int(1))), + } + } + } + ); + } + #[test] fn multiple_decls() { let res = test_parse!( diff --git a/src/parser/type_.rs b/src/parser/type_.rs index 66b4f9f72c23..c90ceda4d72e 100644 --- a/src/parser/type_.rs +++ b/src/parser/type_.rs @@ -1,6 +1,7 @@ use nom::character::complete::{multispace0, multispace1}; use nom::{alt, delimited, do_parse, map, named, opt, separated_list0, tag, terminated, tuple}; +use super::ident; use crate::ast::{FunctionType, Type}; named!(function_type(&str) -> Type, do_parse!( @@ -29,6 +30,7 @@ named!(pub type_(&str) -> Type, alt!( tag!("bool") => { |_| Type::Bool } | tag!("cstring") => { |_| Type::CString } | function_type | + ident => { |id| Type::Var(id) } | delimited!( tuple!(tag!("("), multispace0), type_, @@ -38,7 +40,10 @@ named!(pub type_(&str) -> Type, alt!( #[cfg(test)] mod tests { + use std::convert::TryFrom; + use super::*; + use crate::ast::Ident; #[test] fn simple_types() { @@ -103,4 +108,18 @@ mod tests { }) ) } + + #[test] + fn type_vars() { + assert_eq!( + test_parse!(type_, "fn x, y -> x"), + Type::Function(FunctionType { + args: vec![ + Type::Var(Ident::try_from("x").unwrap()), + Type::Var(Ident::try_from("y").unwrap()), + ], + ret: Box::new(Type::Var(Ident::try_from("x").unwrap())), + }) + ) + } } diff --git a/src/tc/mod.rs b/src/tc/mod.rs index 2c40a02bf7c6..4c088c885749 100644 --- a/src/tc/mod.rs +++ b/src/tc/mod.rs @@ -1,13 +1,16 @@ +use bimap::BiMap; use derive_more::From; use itertools::Itertools; +use std::cell::RefCell; use std::collections::HashMap; use std::convert::{TryFrom, TryInto}; use std::fmt::{self, Display}; -use std::result; +use std::{mem, result}; use thiserror::Error; -use crate::ast::{self, hir, BinaryOperator, Ident, Literal}; +use crate::ast::{self, hir, Arg, BinaryOperator, Ident, Literal}; use crate::common::env::Env; +use crate::common::{Namer, NamerOf}; #[derive(Debug, Error)] pub enum Error { @@ -52,7 +55,7 @@ pub enum PrimType { CString, } -impl From for ast::Type { +impl<'a> From for ast::Type<'a> { fn from(pr: PrimType) -> Self { match pr { PrimType::Int => ast::Type::Int, @@ -88,22 +91,7 @@ pub enum Type { }, } -impl PartialEq for Type { - fn eq(&self, other: &ast::Type) -> bool { - match (self, other) { - (Type::Univ(_), _) => todo!(), - (Type::Exist(_), _) => false, - (Type::Nullary(_), _) => todo!(), - (Type::Prim(pr), ty) => ast::Type::from(*pr) == *ty, - (Type::Fun { args, ret }, ast::Type::Function(ft)) => { - *args == ft.args && (**ret).eq(&*ft.ret) - } - (Type::Fun { .. }, _) => false, - } - } -} - -impl TryFrom for ast::Type { +impl<'a> TryFrom for ast::Type<'a> { type Error = Type; fn try_from(value: Type) -> result::Result { @@ -142,33 +130,29 @@ impl Display for Type { } } -impl From for Type { - fn from(type_: ast::Type) -> Self { - match type_ { - ast::Type::Int => INT, - ast::Type::Float => FLOAT, - ast::Type::Bool => BOOL, - ast::Type::CString => CSTRING, - ast::Type::Function(ast::FunctionType { args, ret }) => Type::Fun { - args: args.into_iter().map(Self::from).collect(), - ret: Box::new(Self::from(*ret)), - }, - } - } -} - struct Typechecker<'ast> { - ty_var_counter: u64, + ty_var_namer: NamerOf, ctx: HashMap, env: Env, Type>, + + /// AST type var -> type + instantiations: Env, Type>, + + /// AST type-var -> universal TyVar + type_vars: RefCell<(BiMap, TyVar>, NamerOf>)>, } impl<'ast> Typechecker<'ast> { fn new() -> Self { Self { - ty_var_counter: 0, + ty_var_namer: Namer::new(TyVar).boxed(), + type_vars: RefCell::new(( + Default::default(), + Namer::alphabetic().map(|n| Ident::try_from(n).unwrap()), + )), ctx: Default::default(), env: Default::default(), + instantiations: Default::default(), } } @@ -224,7 +208,8 @@ impl<'ast> Typechecker<'ast> { |ast::Binding { ident, type_, body }| -> Result> { let body = self.tc_expr(body)?; if let Some(type_) = type_ { - self.unify(body.type_(), &type_.into())?; + let type_ = self.type_from_ast_type(type_); + self.unify(body.type_(), &type_)?; } self.env.set(ident.clone(), body.type_().clone()); Ok(hir::Binding { @@ -265,19 +250,22 @@ impl<'ast> Typechecker<'ast> { self.env.push(); let args: Vec<_> = args .into_iter() - .map(|id| { - let ty = self.fresh_ex(); - self.env.set(id.clone(), ty.clone()); - (id, ty) + .map(|Arg { ident, type_ }| { + let ty = match type_ { + Some(t) => self.type_from_ast_type(t), + None => self.fresh_ex(), + }; + self.env.set(ident.clone(), ty.clone()); + (ident, ty) }) .collect(); let body = self.tc_expr(body)?; self.env.pop(); Ok(hir::Expr::Fun { - type_: Type::Fun { - args: args.iter().map(|(_, ty)| ty.clone()).collect(), - ret: Box::new(body.type_().clone()), - }, + type_: self.universalize( + args.iter().map(|(_, ty)| ty.clone()).collect(), + body.type_().clone(), + ), args, body: Box::new(body), }) @@ -290,6 +278,7 @@ impl<'ast> Typechecker<'ast> { ret: Box::new(ret_ty.clone()), }; let fun = self.tc_expr(*fun)?; + self.instantiations.push(); self.unify(&ft, fun.type_())?; let args = args .into_iter() @@ -300,6 +289,7 @@ impl<'ast> Typechecker<'ast> { Ok(arg) }) .try_collect()?; + self.commit_instantiations(); Ok(hir::Expr::Call { fun: Box::new(fun), args, @@ -308,7 +298,8 @@ impl<'ast> Typechecker<'ast> { } ast::Expr::Ascription { expr, type_ } => { let expr = self.tc_expr(*expr)?; - self.unify(expr.type_(), &type_.into())?; + let type_ = self.type_from_ast_type(type_); + self.unify(expr.type_(), &type_)?; Ok(expr) } } @@ -334,8 +325,7 @@ impl<'ast> Typechecker<'ast> { } fn fresh_tv(&mut self) -> TyVar { - self.ty_var_counter += 1; - TyVar(self.ty_var_counter) + self.ty_var_namer.make_name() } fn fresh_ex(&mut self) -> Type { @@ -343,29 +333,69 @@ impl<'ast> Typechecker<'ast> { } fn fresh_univ(&mut self) -> Type { - Type::Exist(self.fresh_tv()) - } + Type::Univ(self.fresh_tv()) + } + + #[allow(clippy::redundant_closure)] // https://github.com/rust-lang/rust-clippy/issues/6903 + fn universalize(&mut self, args: Vec, ret: Type) -> Type { + let mut vars = HashMap::new(); + let mut universalize_type = move |ty| match ty { + Type::Exist(tv) if self.resolve_tv(tv).is_none() => vars + .entry(tv) + .or_insert_with_key(|tv| { + let ty = self.fresh_univ(); + self.ctx.insert(*tv, ty.clone()); + ty + }) + .clone(), + _ => ty, + }; - fn universalize<'a>(&mut self, expr: hir::Expr<'a, Type>) -> hir::Expr<'a, Type> { - // TODO - expr + Type::Fun { + args: args.into_iter().map(|t| universalize_type(t)).collect(), + ret: Box::new(universalize_type(ret)), + } } fn unify(&mut self, ty1: &Type, ty2: &Type) -> Result { match (ty1, ty2) { - (Type::Prim(p1), Type::Prim(p2)) if p1 == p2 => Ok(ty2.clone()), (Type::Exist(tv), ty) | (ty, Type::Exist(tv)) => match self.resolve_tv(*tv) { - Some(existing_ty) if *ty == existing_ty => Ok(ty.clone()), - Some(existing_ty) => Err(Error::TypeMismatch { - expected: ty.clone(), - actual: existing_ty.into(), - }), + Some(existing_ty) if self.types_match(ty, &existing_ty) => Ok(ty.clone()), + Some(var @ ast::Type::Var(_)) => { + let var = self.type_from_ast_type(var); + self.unify(&var, ty) + } + Some(existing_ty) => match ty { + Type::Exist(_) => { + let rhs = self.type_from_ast_type(existing_ty); + self.unify(ty, &rhs) + } + _ => Err(Error::TypeMismatch { + expected: ty.clone(), + actual: self.type_from_ast_type(existing_ty), + }), + }, None => match self.ctx.insert(*tv, ty.clone()) { Some(existing) => self.unify(&existing, ty), None => Ok(ty.clone()), }, }, (Type::Univ(u1), Type::Univ(u2)) if u1 == u2 => Ok(ty2.clone()), + (Type::Univ(u), ty) | (ty, Type::Univ(u)) => { + let ident = self.name_univ(*u); + match self.instantiations.resolve(&ident) { + Some(existing_ty) if ty == existing_ty => Ok(ty.clone()), + Some(existing_ty) => Err(Error::TypeMismatch { + expected: ty.clone(), + actual: existing_ty.clone(), + }), + None => { + self.instantiations.set(ident, ty.clone()); + Ok(ty.clone()) + } + } + } + (Type::Prim(p1), Type::Prim(p2)) if p1 == p2 => Ok(ty2.clone()), ( Type::Fun { args: args1, @@ -395,18 +425,24 @@ impl<'ast> Typechecker<'ast> { } } - fn finalize_expr(&self, expr: hir::Expr<'ast, Type>) -> Result> { + fn finalize_expr( + &self, + expr: hir::Expr<'ast, Type>, + ) -> Result>> { expr.traverse_type(|ty| self.finalize_type(ty)) } - fn finalize_decl(&self, decl: hir::Decl<'ast, Type>) -> Result> { + fn finalize_decl( + &self, + decl: hir::Decl<'ast, Type>, + ) -> Result>> { decl.traverse_type(|ty| self.finalize_type(ty)) } - fn finalize_type(&self, ty: Type) -> Result { - match ty { + fn finalize_type(&self, ty: Type) -> Result> { + let ret = match ty { Type::Exist(tv) => self.resolve_tv(tv).ok_or(Error::AmbiguousType(tv)), - Type::Univ(tv) => todo!(), + Type::Univ(tv) => Ok(ast::Type::Var(self.name_univ(tv))), Type::Nullary(_) => todo!(), Type::Prim(pr) => Ok(pr.into()), Type::Fun { args, ret } => Ok(ast::Type::Function(ast::FunctionType { @@ -416,23 +452,105 @@ impl<'ast> Typechecker<'ast> { .try_collect()?, ret: Box::new(self.finalize_type(*ret)?), })), - } + }; + ret } - fn resolve_tv(&self, tv: TyVar) -> Option { + fn resolve_tv(&self, tv: TyVar) -> Option> { let mut res = &Type::Exist(tv); loop { match res { Type::Exist(tv) => { res = self.ctx.get(tv)?; } - Type::Univ(_) => todo!(), + Type::Univ(tv) => { + let ident = self.name_univ(*tv); + if let Some(r) = self.instantiations.resolve(&ident) { + res = r; + } else { + break Some(ast::Type::Var(ident)); + } + } Type::Nullary(_) => todo!(), Type::Prim(pr) => break Some((*pr).into()), Type::Fun { args, ret } => todo!(), } } } + + fn type_from_ast_type(&mut self, ast_type: ast::Type<'ast>) -> Type { + match ast_type { + ast::Type::Int => INT, + ast::Type::Float => FLOAT, + ast::Type::Bool => BOOL, + ast::Type::CString => CSTRING, + ast::Type::Function(ast::FunctionType { args, ret }) => Type::Fun { + args: args + .into_iter() + .map(|t| self.type_from_ast_type(t)) + .collect(), + ret: Box::new(self.type_from_ast_type(*ret)), + }, + ast::Type::Var(id) => Type::Univ({ + let opt_tv = { self.type_vars.borrow_mut().0.get_by_left(&id).copied() }; + opt_tv.unwrap_or_else(|| { + let tv = self.fresh_tv(); + self.type_vars + .borrow_mut() + .0 + .insert_no_overwrite(id, tv) + .unwrap(); + tv + }) + }), + } + } + + fn name_univ(&self, tv: TyVar) -> Ident<'static> { + let mut vars = self.type_vars.borrow_mut(); + vars.0 + .get_by_right(&tv) + .map(Ident::to_owned) + .unwrap_or_else(|| { + let name = vars.1.make_name(); + vars.0.insert_no_overwrite(name.clone(), tv).unwrap(); + name + }) + } + + fn commit_instantiations(&mut self) { + let mut ctx = mem::take(&mut self.ctx); + for (_, v) in ctx.iter_mut() { + if let Type::Univ(tv) = v { + if let Some(concrete) = self.instantiations.resolve(&self.name_univ(*tv)) { + *v = concrete.clone(); + } + } + } + self.ctx = ctx; + self.instantiations.pop(); + } + + fn types_match(&self, type_: &Type, ast_type: &ast::Type<'ast>) -> bool { + match (type_, ast_type) { + (Type::Univ(u), ast::Type::Var(v)) => { + Some(u) == self.type_vars.borrow().0.get_by_left(v) + } + (Type::Univ(_), _) => false, + (Type::Exist(_), _) => false, + (Type::Nullary(_), _) => todo!(), + (Type::Prim(pr), ty) => ast::Type::from(*pr) == *ty, + (Type::Fun { args, ret }, ast::Type::Function(ft)) => { + args.len() == ft.args.len() + && args + .iter() + .zip(&ft.args) + .all(|(a1, a2)| self.types_match(a1, &a2)) + && self.types_match(&*ret, &*ft.ret) + } + (Type::Fun { .. }, _) => false, + } + } } pub fn typecheck_expr(expr: ast::Expr) -> Result> { @@ -446,8 +564,10 @@ pub fn typecheck_toplevel(decls: Vec) -> Result x"); + assert_type!("fn x = x", "fn x -> x"); } #[test] @@ -517,6 +642,11 @@ mod tests { assert_type!("fn x = x + 1", "fn int -> int"); } + #[test] + fn arg_ascriptions() { + assert_type!("fn (x: int) = x", "fn int -> int"); + } + #[test] fn call_concrete_function() { assert_type!("(fn x = x + 1) 2", "int"); diff --git a/tests/compile.rs b/tests/compile.rs new file mode 100644 index 000000000000..177391423c7d --- /dev/null +++ b/tests/compile.rs @@ -0,0 +1,41 @@ +use std::process::Command; + +use crate_root::root; + +const FIXTURES: &[(&str, i32)] = &[("simple", 5), ("functions", 9)]; + +#[test] +fn compile_and_run_files() { + let ach = root().unwrap().join("ach"); + + for (fixture, exit_code) in FIXTURES { + println!(">>> Testing: {}", fixture); + + println!(" Running: `make {}`", fixture); + assert!( + Command::new("make") + .arg(fixture) + .current_dir(&ach) + .spawn() + .unwrap() + .wait() + .unwrap() + .success(), + "make failed" + ); + + let out_path = ach.join(fixture); + println!(" Running: `{}`", out_path.to_str().unwrap()); + assert_eq!( + Command::new(out_path) + .spawn() + .unwrap() + .wait() + .unwrap() + .code() + .unwrap(), + *exit_code, + ); + println!(" OK"); + } +} -- cgit 1.4.1 From b93268085aab14c80a400c299da5d04d2781098e Mon Sep 17 00:00:00 2001 From: Griffin Smith Date: Sun, 14 Mar 2021 17:01:25 -0400 Subject: Implement top-level ascription of declarations --- ach/Makefile | 2 +- src/ast/mod.rs | 1 + src/parser/mod.rs | 30 +++++++++++++++++++++++++++++- src/tc/mod.rs | 42 +++++++++++++++++++++++++++++------------- tests/compile.rs | 13 +++++++++++++ 5 files changed, 73 insertions(+), 15 deletions(-) diff --git a/ach/Makefile b/ach/Makefile index 869a0d0f8a3e..3a8cd2865e87 100644 --- a/ach/Makefile +++ b/ach/Makefile @@ -12,4 +12,4 @@ default: simple .PHONY: clean clean: - @rm -f *.ll *.o simple + @rm -f *.ll *.o simple functions diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 1884ba69f43c..3a2261aeda23 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -266,6 +266,7 @@ impl<'a> Fun<'a> { #[derive(Debug, PartialEq, Eq, Clone)] pub enum Decl<'a> { Fun { name: Ident<'a>, body: Fun<'a> }, + Ascription { name: Ident<'a>, type_: Type<'a> }, } //// diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8599ccabfc23..dd7874aff853 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -98,7 +98,20 @@ named!(fun_decl(&str) -> Decl, do_parse!( }) )); +named!(ascription_decl(&str) -> Decl, do_parse!( + name: ident + >> multispace0 + >> complete!(char!(':')) + >> multispace0 + >> type_: type_ + >> (Decl::Ascription { + name, + type_ + }) +)); + named!(pub decl(&str) -> Decl, alt!( + ascription_decl | fun_decl )); @@ -108,7 +121,7 @@ named!(pub toplevel(&str) -> Vec, terminated!(many0!(decl), multispace0)); mod tests { use std::convert::TryInto; - use crate::ast::{BinaryOperator, Expr, Literal, Type}; + use crate::ast::{BinaryOperator, Expr, FunctionType, Literal, Type}; use super::*; use expr::tests::ident_expr; @@ -166,4 +179,19 @@ mod tests { ); assert_eq!(res.len(), 3); } + + #[test] + fn top_level_ascription() { + let res = test_parse!(toplevel, "id : fn a -> a"); + assert_eq!( + res, + vec![Decl::Ascription { + name: "id".try_into().unwrap(), + type_: Type::Function(FunctionType { + args: vec![Type::Var("a".try_into().unwrap())], + ret: Box::new(Type::Var("a".try_into().unwrap())) + }) + }] + ) + } } diff --git a/src/tc/mod.rs b/src/tc/mod.rs index 4c088c885749..559ac993cc9b 100644 --- a/src/tc/mod.rs +++ b/src/tc/mod.rs @@ -305,22 +305,38 @@ impl<'ast> Typechecker<'ast> { } } - pub(crate) fn tc_decl(&mut self, decl: ast::Decl<'ast>) -> Result> { + pub(crate) fn tc_decl( + &mut self, + decl: ast::Decl<'ast>, + ) -> Result>> { match decl { ast::Decl::Fun { name, body } => { - let body = self.tc_expr(ast::Expr::Fun(Box::new(body)))?; + let mut expr = ast::Expr::Fun(Box::new(body)); + if let Some(type_) = self.env.resolve(&name) { + expr = ast::Expr::Ascription { + expr: Box::new(expr), + type_: self.finalize_type(type_.clone())?, + }; + } + + let body = self.tc_expr(expr)?; let type_ = body.type_().clone(); self.env.set(name.clone(), type_); match body { - hir::Expr::Fun { args, body, type_ } => Ok(hir::Decl::Fun { + hir::Expr::Fun { args, body, type_ } => Ok(Some(hir::Decl::Fun { name, args, body, type_, - }), + })), _ => unreachable!(), } } + ast::Decl::Ascription { name, type_ } => { + let type_ = self.type_from_ast_type(type_); + self.env.set(name.clone(), type_); + Ok(None) + } } } @@ -561,15 +577,15 @@ pub fn typecheck_expr(expr: ast::Expr) -> Result> { pub fn typecheck_toplevel(decls: Vec) -> Result>> { let mut typechecker = Typechecker::new(); - decls - .into_iter() - .map(|decl| { - let hir_decl = typechecker.tc_decl(decl)?; - let res = typechecker.finalize_decl(hir_decl)?; - typechecker.ctx.clear(); - Ok(res) - }) - .try_collect() + let mut res = Vec::with_capacity(decls.len()); + for decl in decls { + if let Some(hir_decl) = typechecker.tc_decl(decl)? { + let hir_decl = typechecker.finalize_decl(hir_decl)?; + res.push(hir_decl); + } + typechecker.ctx.clear(); + } + Ok(res) } #[cfg(test)] diff --git a/tests/compile.rs b/tests/compile.rs index 177391423c7d..7fa15ad9653e 100644 --- a/tests/compile.rs +++ b/tests/compile.rs @@ -8,6 +8,19 @@ const FIXTURES: &[(&str, i32)] = &[("simple", 5), ("functions", 9)]; fn compile_and_run_files() { let ach = root().unwrap().join("ach"); + println!("Running: `make clean`"); + assert!( + Command::new("make") + .arg("clean") + .current_dir(&ach) + .spawn() + .unwrap() + .wait() + .unwrap() + .success(), + "make clean failed" + ); + for (fixture, exit_code) in FIXTURES { println!(">>> Testing: {}", fixture); -- cgit 1.4.1