From a79c233ae62703d1e27054084f70f6b0ddc866a4 Mon Sep 17 00:00:00 2001 From: Adam Joseph Date: Mon, 31 Oct 2022 03:47:50 -0700 Subject: feat(tvix/eval): implement builtins.split This implements builtins.split, and passes eval-okay-regex-split.nix (which is moved out of notyetpassing). Signed-off-by: Adam Joseph Change-Id: Ieb0975da2058966c697ee0e2f5b3f26ccabfae57 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7143 Tested-by: BuildkiteCI Reviewed-by: grfn --- tvix/eval/docs/builtins.md | 2 +- tvix/eval/src/builtins/mod.rs | 38 +++++++++++++++++ .../src/tests/nix_tests/eval-okay-regex-split.exp | 1 + .../src/tests/nix_tests/eval-okay-regex-split.nix | 48 ++++++++++++++++++++++ .../notyetpassing/eval-okay-regex-split.exp | 1 - .../notyetpassing/eval-okay-regex-split.nix | 48 ---------------------- 6 files changed, 88 insertions(+), 50 deletions(-) create mode 100644 tvix/eval/src/tests/nix_tests/eval-okay-regex-split.exp create mode 100644 tvix/eval/src/tests/nix_tests/eval-okay-regex-split.nix delete mode 100644 tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-regex-split.exp delete mode 100644 tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-regex-split.nix (limited to 'tvix') diff --git a/tvix/eval/docs/builtins.md b/tvix/eval/docs/builtins.md index 89fe45318a95..00af50484903 100644 --- a/tvix/eval/docs/builtins.md +++ b/tvix/eval/docs/builtins.md @@ -102,7 +102,7 @@ The `impl` column indicates implementation status in tvix: | scopedImport | true | | | | | seq | false | | | | | sort | false | | | | -| split | false | | | todo | +| split | false | | | | | splitVersion | false | | | | | storeDir | false | | | store | | storePath | false | | | store | diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs index 7f55c90c15ad..a6fcb8742c56 100644 --- a/tvix/eval/src/builtins/mod.rs +++ b/tvix/eval/src/builtins/mod.rs @@ -632,6 +632,44 @@ fn pure_builtins() -> Vec { // we just return the second and ignore the first Ok(args.pop().unwrap()) }), + Builtin::new( + "split", + &[true, true], + |mut args: Vec, _: &mut VM| { + let s = args.pop().unwrap().to_str()?; + let text = s.as_str(); + let re = args.pop().unwrap().to_str()?; + let re: Regex = Regex::new(re.as_str()).unwrap(); + let mut capture_locations = re.capture_locations(); + let num_captures = capture_locations.len(); + let mut ret = NixList::new(); + let mut pos = 0; + + while let Some(thematch) = re.captures_read_at(&mut capture_locations, text, pos) { + // push the unmatched characters preceding the match + ret.push(Value::from(&text[pos..thematch.start()])); + + // Push a list with one element for each capture + // group in the regex, containing the characters + // matched by that capture group, or null if no match. + // We skip capture 0; it represents the whole match. + let v: Vec = (1..num_captures) + .map(|i| capture_locations.get(i)) + .map(|o| { + o.map(|(start, end)| Value::from(&text[start..end])) + .unwrap_or(Value::Null) + }) + .collect(); + ret.push(Value::List(NixList::from(v))); + pos = thematch.end(); + } + + // push the unmatched characters following the last match + ret.push(Value::from(&text[pos..])); + + Ok(Value::List(ret)) + }, + ), Builtin::new("sort", &[true, true], |args: Vec, vm: &mut VM| { let mut list = args[1].to_list()?; let comparator = &args[0]; diff --git a/tvix/eval/src/tests/nix_tests/eval-okay-regex-split.exp b/tvix/eval/src/tests/nix_tests/eval-okay-regex-split.exp new file mode 100644 index 000000000000..27ba77ddaf61 --- /dev/null +++ b/tvix/eval/src/tests/nix_tests/eval-okay-regex-split.exp @@ -0,0 +1 @@ +true diff --git a/tvix/eval/src/tests/nix_tests/eval-okay-regex-split.nix b/tvix/eval/src/tests/nix_tests/eval-okay-regex-split.nix new file mode 100644 index 000000000000..0073e057787d --- /dev/null +++ b/tvix/eval/src/tests/nix_tests/eval-okay-regex-split.nix @@ -0,0 +1,48 @@ +with builtins; + +# Non capturing regex returns empty lists +assert split "foobar" "foobar" == ["" [] ""]; +assert split "fo*" "f" == ["" [] ""]; +assert split "fo+" "f" == ["f"]; +assert split "fo*" "fo" == ["" [] ""]; +assert split "fo*" "foo" == ["" [] ""]; +assert split "fo+" "foo" == ["" [] ""]; +assert split "fo{1,2}" "foo" == ["" [] ""]; +assert split "fo{1,2}" "fooo" == ["" [] "o"]; +assert split "fo*" "foobar" == ["" [] "bar"]; + +# Capturing regex returns a list of sub-matches +assert split "(fo*)" "f" == ["" ["f"] ""]; +assert split "(fo+)" "f" == ["f"]; +assert split "(fo*)" "fo" == ["" ["fo"] ""]; +assert split "(f)(o*)" "f" == ["" ["f" ""] ""]; +assert split "(f)(o*)" "foo" == ["" ["f" "oo"] ""]; +assert split "(fo+)" "foo" == ["" ["foo"] ""]; +assert split "(fo{1,2})" "foo" == ["" ["foo"] ""]; +assert split "(fo{1,2})" "fooo" == ["" ["foo"] "o"]; +assert split "(fo*)" "foobar" == ["" ["foo"] "bar"]; + +# Matches are greedy. +assert split "(o+)" "oooofoooo" == ["" ["oooo"] "f" ["oooo"] ""]; + +# Matches multiple times. +assert split "(b)" "foobarbaz" == ["foo" ["b"] "ar" ["b"] "az"]; + +# Split large strings containing newlines. null are inserted when a +# pattern within the current did not match anything. +assert split "[[:space:]]+|([',.!?])" '' + Nix Rocks! + That's why I use it. +'' == [ + "Nix" [ null ] "Rocks" ["!"] "" [ null ] + "That" ["'"] "s" [ null ] "why" [ null ] "I" [ null ] "use" [ null ] "it" ["."] "" [ null ] + "" +]; + +# Documentation examples +assert split "(a)b" "abc" == [ "" [ "a" ] "c" ]; +assert split "([ac])" "abc" == [ "" [ "a" ] "b" [ "c" ] "" ]; +assert split "(a)|(c)" "abc" == [ "" [ "a" null ] "b" [ null "c" ] "" ]; +assert split "([[:upper:]]+)" " FOO " == [ " " [ "FOO" ] " " ]; + +true diff --git a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-regex-split.exp b/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-regex-split.exp deleted file mode 100644 index 27ba77ddaf61..000000000000 --- a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-regex-split.exp +++ /dev/null @@ -1 +0,0 @@ -true diff --git a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-regex-split.nix b/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-regex-split.nix deleted file mode 100644 index 0073e057787d..000000000000 --- a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-regex-split.nix +++ /dev/null @@ -1,48 +0,0 @@ -with builtins; - -# Non capturing regex returns empty lists -assert split "foobar" "foobar" == ["" [] ""]; -assert split "fo*" "f" == ["" [] ""]; -assert split "fo+" "f" == ["f"]; -assert split "fo*" "fo" == ["" [] ""]; -assert split "fo*" "foo" == ["" [] ""]; -assert split "fo+" "foo" == ["" [] ""]; -assert split "fo{1,2}" "foo" == ["" [] ""]; -assert split "fo{1,2}" "fooo" == ["" [] "o"]; -assert split "fo*" "foobar" == ["" [] "bar"]; - -# Capturing regex returns a list of sub-matches -assert split "(fo*)" "f" == ["" ["f"] ""]; -assert split "(fo+)" "f" == ["f"]; -assert split "(fo*)" "fo" == ["" ["fo"] ""]; -assert split "(f)(o*)" "f" == ["" ["f" ""] ""]; -assert split "(f)(o*)" "foo" == ["" ["f" "oo"] ""]; -assert split "(fo+)" "foo" == ["" ["foo"] ""]; -assert split "(fo{1,2})" "foo" == ["" ["foo"] ""]; -assert split "(fo{1,2})" "fooo" == ["" ["foo"] "o"]; -assert split "(fo*)" "foobar" == ["" ["foo"] "bar"]; - -# Matches are greedy. -assert split "(o+)" "oooofoooo" == ["" ["oooo"] "f" ["oooo"] ""]; - -# Matches multiple times. -assert split "(b)" "foobarbaz" == ["foo" ["b"] "ar" ["b"] "az"]; - -# Split large strings containing newlines. null are inserted when a -# pattern within the current did not match anything. -assert split "[[:space:]]+|([',.!?])" '' - Nix Rocks! - That's why I use it. -'' == [ - "Nix" [ null ] "Rocks" ["!"] "" [ null ] - "That" ["'"] "s" [ null ] "why" [ null ] "I" [ null ] "use" [ null ] "it" ["."] "" [ null ] - "" -]; - -# Documentation examples -assert split "(a)b" "abc" == [ "" [ "a" ] "c" ]; -assert split "([ac])" "abc" == [ "" [ "a" ] "b" [ "c" ] "" ]; -assert split "(a)|(c)" "abc" == [ "" [ "a" null ] "b" [ null "c" ] "" ]; -assert split "([[:upper:]]+)" " FOO " == [ " " [ "FOO" ] " " ]; - -true -- cgit 1.4.1