about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAdam Joseph <adam@westernsemico.com>2022-10-31T10·47-0700
committerAdam Joseph <adam@westernsemico.com>2022-11-04T21·28+0000
commita79c233ae62703d1e27054084f70f6b0ddc866a4 (patch)
tree3e030be16a24b8a6c278bd88b62dfcc5334f45a5
parente3a66cbd5d0ffb2306b6fe2cd65b53a1dbd8394f (diff)
feat(tvix/eval): implement builtins.split r/5244
This implements builtins.split, and passes eval-okay-regex-split.nix
(which is moved out of notyetpassing).

Signed-off-by: Adam Joseph <adam@westernsemico.com>
Change-Id: Ieb0975da2058966c697ee0e2f5b3f26ccabfae57
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7143
Tested-by: BuildkiteCI
Reviewed-by: grfn <grfn@gws.fyi>
-rw-r--r--tvix/eval/docs/builtins.md2
-rw-r--r--tvix/eval/src/builtins/mod.rs38
-rw-r--r--tvix/eval/src/tests/nix_tests/eval-okay-regex-split.exp (renamed from tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-regex-split.exp)0
-rw-r--r--tvix/eval/src/tests/nix_tests/eval-okay-regex-split.nix (renamed from tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-regex-split.nix)0
4 files changed, 39 insertions, 1 deletions
diff --git a/tvix/eval/docs/builtins.md b/tvix/eval/docs/builtins.md
index 89fe45318a..00af504849 100644
--- a/tvix/eval/docs/builtins.md
+++ b/tvix/eval/docs/builtins.md
@@ -102,7 +102,7 @@ The `impl` column indicates implementation status in tvix:
 | scopedImport                  | true   |       |       |         |
 | seq                           | false  |       |       |         |
 | sort                          | false  |       |       |         |
-| split                         | false  |       |       | todo    |
+| split                         | false  |       |       |         |
 | splitVersion                  | false  |       |       |         |
 | storeDir                      | false  |       |       | store   |
 | storePath                     | false  |       |       | store   |
diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs
index 7f55c90c15..a6fcb8742c 100644
--- a/tvix/eval/src/builtins/mod.rs
+++ b/tvix/eval/src/builtins/mod.rs
@@ -632,6 +632,44 @@ fn pure_builtins() -> Vec<Builtin> {
             // we just return the second and ignore the first
             Ok(args.pop().unwrap())
         }),
+        Builtin::new(
+            "split",
+            &[true, true],
+            |mut args: Vec<Value>, _: &mut VM| {
+                let s = args.pop().unwrap().to_str()?;
+                let text = s.as_str();
+                let re = args.pop().unwrap().to_str()?;
+                let re: Regex = Regex::new(re.as_str()).unwrap();
+                let mut capture_locations = re.capture_locations();
+                let num_captures = capture_locations.len();
+                let mut ret = NixList::new();
+                let mut pos = 0;
+
+                while let Some(thematch) = re.captures_read_at(&mut capture_locations, text, pos) {
+                    // push the unmatched characters preceding the match
+                    ret.push(Value::from(&text[pos..thematch.start()]));
+
+                    // Push a list with one element for each capture
+                    // group in the regex, containing the characters
+                    // matched by that capture group, or null if no match.
+                    // We skip capture 0; it represents the whole match.
+                    let v: Vec<Value> = (1..num_captures)
+                        .map(|i| capture_locations.get(i))
+                        .map(|o| {
+                            o.map(|(start, end)| Value::from(&text[start..end]))
+                                .unwrap_or(Value::Null)
+                        })
+                        .collect();
+                    ret.push(Value::List(NixList::from(v)));
+                    pos = thematch.end();
+                }
+
+                // push the unmatched characters following the last match
+                ret.push(Value::from(&text[pos..]));
+
+                Ok(Value::List(ret))
+            },
+        ),
         Builtin::new("sort", &[true, true], |args: Vec<Value>, vm: &mut VM| {
             let mut list = args[1].to_list()?;
             let comparator = &args[0];
diff --git a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-regex-split.exp b/tvix/eval/src/tests/nix_tests/eval-okay-regex-split.exp
index 27ba77ddaf..27ba77ddaf 100644
--- a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-regex-split.exp
+++ b/tvix/eval/src/tests/nix_tests/eval-okay-regex-split.exp
diff --git a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-regex-split.nix b/tvix/eval/src/tests/nix_tests/eval-okay-regex-split.nix
index 0073e05778..0073e05778 100644
--- a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-regex-split.nix
+++ b/tvix/eval/src/tests/nix_tests/eval-okay-regex-split.nix