about summary refs log tree commit diff
diff options
context:
space:
mode:
authorGriffin Smith <root@gws.fyi>2022-10-12T22·45-0400
committergrfn <grfn@gws.fyi>2022-10-15T20·35+0000
commit03a3189a3d8e3e9d198ccf73bc0a836aaa6b6e3d (patch)
tree84068017f612e150ed8a5334e687e7872e253902
parent5eb89be68246f1e5a8cd28e48d5cec75921ca97a (diff)
feat(tvix/eval): Initial impl of builtins.match r/5136
Implement an *initial* version of builtins.match, using the rust `regex`
crate for regular expressions. The rust regex crate definitely has
different semantics than nix's regular expressions - but we'd like to
see how far we can get before the incompatibility starts to matter.

This consciously leaves out any sort of memo for compiled regular
expressions (which upstream nix also has) for the sake of expediency -
in the future we should implement that so we don't have to compile the
same regular expression multiple times.

Change-Id: I5b718635831ec83397940e417a9047c4342b6fa1
Reviewed-on: https://cl.tvl.fyi/c/depot/+/6989
Tested-by: BuildkiteCI
Reviewed-by: Adam Joseph <adam@westernsemico.com>
Reviewed-by: tazjin <tazjin@tvl.su>
-rw-r--r--corp/tvixbolt/Cargo.lock33
-rw-r--r--tvix/eval/Cargo.lock12
-rw-r--r--tvix/eval/Cargo.toml1
-rw-r--r--tvix/eval/src/builtins/mod.rs20
-rw-r--r--tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.exp1
-rw-r--r--tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.nix29
6 files changed, 96 insertions, 0 deletions
diff --git a/corp/tvixbolt/Cargo.lock b/corp/tvixbolt/Cargo.lock
index c402a18d4d..a50ca39bf2 100644
--- a/corp/tvixbolt/Cargo.lock
+++ b/corp/tvixbolt/Cargo.lock
@@ -3,6 +3,15 @@
 version = 3
 
 [[package]]
+name = "aho-corasick"
+version = "0.7.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
 name = "atty"
 version = "0.2.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -311,6 +320,12 @@ dependencies = [
 ]
 
 [[package]]
+name = "memchr"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
+
+[[package]]
 name = "memoffset"
 version = "0.6.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -400,6 +415,23 @@ dependencies = [
 ]
 
 [[package]]
+name = "regex"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244"
+
+[[package]]
 name = "rnix"
 version = "0.11.0-dev"
 source = "git+https://github.com/nix-community/rnix-parser.git?rev=85a045afd33e073a3eab4c0ea2f515b6bed557ab#85a045afd33e073a3eab4c0ea2f515b6bed557ab"
@@ -580,6 +612,7 @@ dependencies = [
  "codemap-diagnostic",
  "dirs",
  "path-clean",
+ "regex",
  "rnix",
  "rowan",
  "serde_json",
diff --git a/tvix/eval/Cargo.lock b/tvix/eval/Cargo.lock
index b9fa3d3a11..5e9aaa0867 100644
--- a/tvix/eval/Cargo.lock
+++ b/tvix/eval/Cargo.lock
@@ -3,6 +3,15 @@
 version = 3
 
 [[package]]
+name = "aho-corasick"
+version = "0.7.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
 name = "atty"
 version = "0.2.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -859,6 +868,8 @@ version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
 dependencies = [
+ "aho-corasick",
+ "memchr",
  "regex-syntax",
 ]
 
@@ -1208,6 +1219,7 @@ dependencies = [
  "path-clean",
  "pretty_assertions",
  "proptest",
+ "regex",
  "rnix",
  "rowan",
  "rustyline",
diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml
index 67ed42609d..3c6213c215 100644
--- a/tvix/eval/Cargo.toml
+++ b/tvix/eval/Cargo.toml
@@ -25,6 +25,7 @@ proptest = { version = "1.0.0", default_features = false, features = ["std", "al
 test-strategy = { version = "0.2.1", optional = true }
 clap = { version = "3.2.22", optional = true, features = ["derive", "env"] }
 serde_json = "1.0.86"
+regex = "1.6.0"
 
 # rnix has not been released in a while (as of 2022-09-23), we will
 # use it from git.
diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs
index c0dce868c6..0082d36d57 100644
--- a/tvix/eval/src/builtins/mod.rs
+++ b/tvix/eval/src/builtins/mod.rs
@@ -7,6 +7,8 @@ use std::cmp;
 use std::collections::{BTreeMap, HashMap, HashSet};
 use std::path::PathBuf;
 
+use regex::Regex;
+
 use crate::{
     errors::ErrorKind,
     value::{Builtin, CoercionKind, NixAttrs, NixList, NixString, Value},
@@ -382,6 +384,24 @@ fn pure_builtins() -> Vec<Builtin> {
                 .map_err(Into::into)
         }),
         Builtin::new(
+            "match",
+            &[true, true],
+            |mut args: Vec<Value>, _: &mut VM| {
+                let s = args.pop().unwrap().to_str()?;
+                let re = args.pop().unwrap().to_str()?;
+                let re: Regex = Regex::new(&format!("^{}$", re.as_str())).unwrap();
+                match re.captures(&s) {
+                    Some(caps) => Ok(caps
+                        .iter()
+                        .skip(1)
+                        .map(|grp| grp.map(|g| Value::from(g.as_str())).unwrap_or(Value::Null))
+                        .collect::<Vec<Value>>()
+                        .into()),
+                    None => Ok(Value::Null),
+                }
+            },
+        ),
+        Builtin::new(
             "mul",
             &[false, false],
             |args: Vec<Value>, vm: &mut VM| arithmetic_op!(&*args[0].force(vm)?, &*args[1].force(vm)?, *),
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.exp
new file mode 100644
index 0000000000..9501035391
--- /dev/null
+++ b/tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.exp
@@ -0,0 +1 @@
+[ true true false true true true true false false true false [ "foobar" ] [ "FOO" ] [ "/path/to/" "/path/to" "foobar" "nix" ] [ null null "foobar" "cc" ] ]
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.nix
new file mode 100644
index 0000000000..f774e00a21
--- /dev/null
+++ b/tvix/eval/src/tests/tvix_tests/eval-okay-regex-match.nix
@@ -0,0 +1,29 @@
+with builtins;
+
+let
+
+  matches = pat: s: match pat s != null;
+
+  splitFN = match "((.*)/)?([^/]*)\\.(nix|cc)";
+
+in
+
+[
+  (matches "foobar" "foobar")
+  (matches "fo*" "f")
+  (matches "fo+" "f")
+  (matches "fo*" "fo")
+  (matches "fo*" "foo")
+  (matches "fo+" "foo")
+  (matches "fo{1,2}" "foo")
+  (matches "fo{1,2}" "fooo")
+  (matches "fo*" "foobar")
+  (matches "[[:space:]]+([^[:space:]]+)[[:space:]]+" "  foo   ")
+  (matches "[[:space:]]+([[:upper:]]+)[[:space:]]+" "  foo   ")
+
+  (match "(.*)\\.nix" "foobar.nix")
+  (match "[[:space:]]+([[:upper:]]+)[[:space:]]+" "  FOO   ")
+
+  (splitFN "/path/to/foobar.nix")
+  (splitFN "foobar.cc")
+]