diff options
author | Griffin Smith <root@gws.fyi> | 2022-10-12T22·45-0400 |
---|---|---|
committer | grfn <grfn@gws.fyi> | 2022-10-15T20·35+0000 |
commit | 03a3189a3d8e3e9d198ccf73bc0a836aaa6b6e3d (patch) | |
tree | 84068017f612e150ed8a5334e687e7872e253902 /tvix | |
parent | 5eb89be68246f1e5a8cd28e48d5cec75921ca97a (diff) |
feat(tvix/eval): Initial impl of builtins.match r/5136
Implement an *initial* version of builtins.match, using the rust `regex` crate for regular expressions. The rust regex crate definitely has different semantics than nix's regular expressions - but we'd like to see how far we can get before the incompatibility starts to matter. This consciously leaves out any sort of memo for compiled regular expressions (which upstream nix also has) for the sake of expediency - in the future we should implement that so we don't have to compile the same regular expression multiple times. Change-Id: I5b718635831ec83397940e417a9047c4342b6fa1 Reviewed-on: https://cl.tvl.fyi/c/depot/+/6989 Tested-by: BuildkiteCI Reviewed-by: Adam Joseph <adam@westernsemico.com> Reviewed-by: tazjin <tazjin@tvl.su>
Diffstat (limited to 'tvix')
-rw-r--r-- | tvix/eval/Cargo.lock | 12 | ||||
-rw-r--r-- | tvix/eval/Cargo.toml | 1 | ||||
-rw-r--r-- | tvix/eval/src/builtins/mod.rs | 20 | ||||
-rw-r--r-- | tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.exp | 1 | ||||
-rw-r--r-- | tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.nix | 29 |
5 files changed, 63 insertions, 0 deletions
diff --git a/tvix/eval/Cargo.lock b/tvix/eval/Cargo.lock index b9fa3d3a11da..5e9aaa08679b 100644 --- a/tvix/eval/Cargo.lock +++ b/tvix/eval/Cargo.lock @@ -3,6 +3,15 @@ version = 3 [[package]] +name = "aho-corasick" +version = "0.7.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e" +dependencies = [ + "memchr", +] + +[[package]] name = "atty" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -859,6 +868,8 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" dependencies = [ + "aho-corasick", + "memchr", "regex-syntax", ] @@ -1208,6 +1219,7 @@ dependencies = [ "path-clean", "pretty_assertions", "proptest", + "regex", "rnix", "rowan", "rustyline", diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml index 67ed42609deb..3c6213c21520 100644 --- a/tvix/eval/Cargo.toml +++ b/tvix/eval/Cargo.toml @@ -25,6 +25,7 @@ proptest = { version = "1.0.0", default_features = false, features = ["std", "al test-strategy = { version = "0.2.1", optional = true } clap = { version = "3.2.22", optional = true, features = ["derive", "env"] } serde_json = "1.0.86" +regex = "1.6.0" # rnix has not been released in a while (as of 2022-09-23), we will # use it from git. diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs index c0dce868c6fc..0082d36d5735 100644 --- a/tvix/eval/src/builtins/mod.rs +++ b/tvix/eval/src/builtins/mod.rs @@ -7,6 +7,8 @@ use std::cmp; use std::collections::{BTreeMap, HashMap, HashSet}; use std::path::PathBuf; +use regex::Regex; + use crate::{ errors::ErrorKind, value::{Builtin, CoercionKind, NixAttrs, NixList, NixString, Value}, @@ -382,6 +384,24 @@ fn pure_builtins() -> Vec<Builtin> { .map_err(Into::into) }), Builtin::new( + "match", + &[true, true], + |mut args: Vec<Value>, _: &mut VM| { + let s = args.pop().unwrap().to_str()?; + let re = args.pop().unwrap().to_str()?; + let re: Regex = Regex::new(&format!("^{}$", re.as_str())).unwrap(); + match re.captures(&s) { + Some(caps) => Ok(caps + .iter() + .skip(1) + .map(|grp| grp.map(|g| Value::from(g.as_str())).unwrap_or(Value::Null)) + .collect::<Vec<Value>>() + .into()), + None => Ok(Value::Null), + } + }, + ), + Builtin::new( "mul", &[false, false], |args: Vec<Value>, vm: &mut VM| arithmetic_op!(&*args[0].force(vm)?, &*args[1].force(vm)?, *), diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.exp new file mode 100644 index 000000000000..950103539123 --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.exp @@ -0,0 +1 @@ +[ true true false true true true true false false true false [ "foobar" ] [ "FOO" ] [ "/path/to/" "/path/to" "foobar" "nix" ] [ null null "foobar" "cc" ] ] diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.nix new file mode 100644 index 000000000000..f774e00a215a --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.nix @@ -0,0 +1,29 @@ +with builtins; + +let + + matches = pat: s: match pat s != null; + + splitFN = match "((.*)/)?([^/]*)\\.(nix|cc)"; + +in + +[ + (matches "foobar" "foobar") + (matches "fo*" "f") + (matches "fo+" "f") + (matches "fo*" "fo") + (matches "fo*" "foo") + (matches "fo+" "foo") + (matches "fo{1,2}" "foo") + (matches "fo{1,2}" "fooo") + (matches "fo*" "foobar") + (matches "[[:space:]]+([^[:space:]]+)[[:space:]]+" " foo ") + (matches "[[:space:]]+([[:upper:]]+)[[:space:]]+" " foo ") + + (match "(.*)\\.nix" "foobar.nix") + (match "[[:space:]]+([[:upper:]]+)[[:space:]]+" " FOO ") + + (splitFN "/path/to/foobar.nix") + (splitFN "foobar.cc") +] |