diff options
author | Adam Joseph <adam@westernsemico.com> | 2022-10-31T10·47-0700 |
---|---|---|
committer | Adam Joseph <adam@westernsemico.com> | 2022-11-04T21·28+0000 |
commit | a79c233ae62703d1e27054084f70f6b0ddc866a4 (patch) | |
tree | 3e030be16a24b8a6c278bd88b62dfcc5334f45a5 /tvix/eval/src/builtins/mod.rs | |
parent | e3a66cbd5d0ffb2306b6fe2cd65b53a1dbd8394f (diff) |
feat(tvix/eval): implement builtins.split r/5244
This implements builtins.split, and passes eval-okay-regex-split.nix (which is moved out of notyetpassing). Signed-off-by: Adam Joseph <adam@westernsemico.com> Change-Id: Ieb0975da2058966c697ee0e2f5b3f26ccabfae57 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7143 Tested-by: BuildkiteCI Reviewed-by: grfn <grfn@gws.fyi>
Diffstat (limited to 'tvix/eval/src/builtins/mod.rs')
-rw-r--r-- | tvix/eval/src/builtins/mod.rs | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs index 7f55c90c15ad..a6fcb8742c56 100644 --- a/tvix/eval/src/builtins/mod.rs +++ b/tvix/eval/src/builtins/mod.rs @@ -632,6 +632,44 @@ fn pure_builtins() -> Vec<Builtin> { // we just return the second and ignore the first Ok(args.pop().unwrap()) }), + Builtin::new( + "split", + &[true, true], + |mut args: Vec<Value>, _: &mut VM| { + let s = args.pop().unwrap().to_str()?; + let text = s.as_str(); + let re = args.pop().unwrap().to_str()?; + let re: Regex = Regex::new(re.as_str()).unwrap(); + let mut capture_locations = re.capture_locations(); + let num_captures = capture_locations.len(); + let mut ret = NixList::new(); + let mut pos = 0; + + while let Some(thematch) = re.captures_read_at(&mut capture_locations, text, pos) { + // push the unmatched characters preceding the match + ret.push(Value::from(&text[pos..thematch.start()])); + + // Push a list with one element for each capture + // group in the regex, containing the characters + // matched by that capture group, or null if no match. + // We skip capture 0; it represents the whole match. + let v: Vec<Value> = (1..num_captures) + .map(|i| capture_locations.get(i)) + .map(|o| { + o.map(|(start, end)| Value::from(&text[start..end])) + .unwrap_or(Value::Null) + }) + .collect(); + ret.push(Value::List(NixList::from(v))); + pos = thematch.end(); + } + + // push the unmatched characters following the last match + ret.push(Value::from(&text[pos..])); + + Ok(Value::List(ret)) + }, + ), Builtin::new("sort", &[true, true], |args: Vec<Value>, vm: &mut VM| { let mut list = args[1].to_list()?; let comparator = &args[0]; |