diff options
author | Florian Klink <flokli@flokli.de> | 2023-07-29T19·14+0200 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2023-07-31T21·41+0000 |
commit | 79531c3dab1c24ff3171c0aa067004c8e6c92e3f (patch) | |
tree | 6e4198e648810bb835a8b0e0f68bf1af779829a8 /tvix | |
parent | 9521df708f92a237090b1b17ec969b319c4d00fe (diff) |
refactor(tvix/nix-compat): support non-unicode Derivations r/6449
Derivations can have non-unicode strings in their env values, so the ATerm representations are not necessarily String anymore, but Vec<u8>. Change-Id: Ic23839471eb7f68d9c3c30667c878830946b6607 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8990 Tested-by: BuildkiteCI Reviewed-by: raitobezarius <tvl@lahfa.xyz> Autosubmit: flokli <flokli@flokli.de>
Diffstat (limited to 'tvix')
-rw-r--r-- | tvix/Cargo.lock | 18 | ||||
-rw-r--r-- | tvix/Cargo.nix | 82 | ||||
-rw-r--r-- | tvix/cli/src/derivation.rs | 6 | ||||
-rw-r--r-- | tvix/cli/src/refscan.rs | 16 | ||||
-rw-r--r-- | tvix/nix-compat/Cargo.toml | 1 | ||||
-rw-r--r-- | tvix/nix-compat/src/derivation/escape.rs | 31 | ||||
-rw-r--r-- | tvix/nix-compat/src/derivation/mod.rs | 31 | ||||
-rw-r--r-- | tvix/nix-compat/src/derivation/string_escape.rs | 17 | ||||
-rw-r--r-- | tvix/nix-compat/src/derivation/tests/derivation_tests/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv | 1 | ||||
-rw-r--r-- | tvix/nix-compat/src/derivation/tests/derivation_tests/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json | 21 | ||||
-rw-r--r-- | tvix/nix-compat/src/derivation/tests/derivation_tests/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv | 1 | ||||
-rw-r--r-- | tvix/nix-compat/src/derivation/tests/derivation_tests/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json | 21 | ||||
-rw-r--r-- | tvix/nix-compat/src/derivation/tests/mod.rs | 159 | ||||
-rw-r--r-- | tvix/nix-compat/src/derivation/write.rs | 145 |
14 files changed, 426 insertions, 124 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 52cc6d4a7535..cb65d4343fcb 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -267,6 +267,17 @@ dependencies = [ ] [[package]] +name = "bstr" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + +[[package]] name = "bumpalo" version = "3.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1346,6 +1357,7 @@ name = "nix-compat" version = "0.1.0" dependencies = [ "anyhow", + "bstr", "data-encoding", "glob", "serde", @@ -1890,6 +1902,12 @@ dependencies = [ ] [[package]] +name = "regex-automata" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7b6d6190b7594385f61bd3911cd1be99dfddcfc365a4160cc2ab5bff4aed294" + +[[package]] name = "regex-syntax" version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index 83c3825c3160..b44e10de920e 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -829,6 +829,43 @@ rec { ]; }; + "bstr" = rec { + crateName = "bstr"; + version = "1.6.0"; + edition = "2021"; + sha256 = "01bvsr3x9n75klbwxym0zf939vzim0plsmy786p0zzzvrj6i9637"; + authors = [ + "Andrew Gallant <jamslam@gmail.com>" + ]; + dependencies = [ + { + name = "memchr"; + packageId = "memchr"; + usesDefaultFeatures = false; + } + { + name = "regex-automata"; + packageId = "regex-automata"; + optional = true; + usesDefaultFeatures = false; + features = [ "dfa-search" ]; + } + { + name = "serde"; + packageId = "serde"; + optional = true; + usesDefaultFeatures = false; + } + ]; + features = { + "alloc" = [ "serde?/alloc" ]; + "default" = [ "std" "unicode" ]; + "serde" = [ "dep:serde" ]; + "std" = [ "alloc" "memchr/std" "serde?/std" ]; + "unicode" = [ "dep:regex-automata" ]; + }; + resolvedDefaultFeatures = [ "alloc" "default" "serde" "std" "unicode" ]; + }; "bumpalo" = rec { crateName = "bumpalo"; version = "3.12.1"; @@ -3856,6 +3893,11 @@ rec { packageId = "anyhow"; } { + name = "bstr"; + packageId = "bstr"; + features = [ "alloc" "unicode" "serde" ]; + } + { name = "data-encoding"; packageId = "data-encoding"; } @@ -5337,6 +5379,46 @@ rec { }; resolvedDefaultFeatures = [ "aho-corasick" "default" "memchr" "perf" "perf-cache" "perf-dfa" "perf-inline" "perf-literal" "std" "unicode" "unicode-age" "unicode-bool" "unicode-case" "unicode-gencat" "unicode-perl" "unicode-script" "unicode-segment" ]; }; + "regex-automata" = rec { + crateName = "regex-automata"; + version = "0.3.4"; + edition = "2021"; + sha256 = "156jmvsbzd9arih42ninzkfgv7g93g6i2fdxc5gki53m1ccxddmp"; + authors = [ + "The Rust Project Developers" + "Andrew Gallant <jamslam@gmail.com>" + ]; + features = { + "default" = [ "std" "syntax" "perf" "unicode" "meta" "nfa" "dfa" "hybrid" ]; + "dfa" = [ "dfa-build" "dfa-search" "dfa-onepass" ]; + "dfa-build" = [ "nfa-thompson" "dfa-search" ]; + "dfa-onepass" = [ "nfa-thompson" ]; + "hybrid" = [ "alloc" "nfa-thompson" ]; + "internal-instrument" = [ "internal-instrument-pikevm" ]; + "internal-instrument-pikevm" = [ "logging" "std" ]; + "logging" = [ "dep:log" "aho-corasick?/logging" ]; + "meta" = [ "syntax" "nfa-pikevm" ]; + "nfa" = [ "nfa-thompson" "nfa-pikevm" "nfa-backtrack" ]; + "nfa-backtrack" = [ "nfa-thompson" ]; + "nfa-pikevm" = [ "nfa-thompson" ]; + "nfa-thompson" = [ "alloc" ]; + "perf" = [ "perf-inline" "perf-literal" ]; + "perf-literal" = [ "perf-literal-substring" "perf-literal-multisubstring" ]; + "perf-literal-multisubstring" = [ "std" "dep:aho-corasick" ]; + "perf-literal-substring" = [ "aho-corasick?/perf-literal" "dep:memchr" ]; + "std" = [ "regex-syntax?/std" "memchr?/std" "aho-corasick?/std" "alloc" ]; + "syntax" = [ "dep:regex-syntax" "alloc" ]; + "unicode" = [ "unicode-age" "unicode-bool" "unicode-case" "unicode-gencat" "unicode-perl" "unicode-script" "unicode-segment" "unicode-word-boundary" "regex-syntax?/unicode" ]; + "unicode-age" = [ "regex-syntax?/unicode-age" ]; + "unicode-bool" = [ "regex-syntax?/unicode-bool" ]; + "unicode-case" = [ "regex-syntax?/unicode-case" ]; + "unicode-gencat" = [ "regex-syntax?/unicode-gencat" ]; + "unicode-perl" = [ "regex-syntax?/unicode-perl" ]; + "unicode-script" = [ "regex-syntax?/unicode-script" ]; + "unicode-segment" = [ "regex-syntax?/unicode-segment" ]; + }; + resolvedDefaultFeatures = [ "dfa-search" ]; + }; "regex-syntax 0.6.29" = rec { crateName = "regex-syntax"; version = "0.6.29"; diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs index fa246cc74fe9..f8afe1946110 100644 --- a/tvix/cli/src/derivation.rs +++ b/tvix/cli/src/derivation.rs @@ -283,7 +283,7 @@ mod derivation_builtins { // Most of these are also added to the builder's environment in "raw" form. if drv .environment - .insert(name.as_str().to_string(), val_str) + .insert(name.as_str().to_string(), val_str.into()) .is_some() { return Err(Error::DuplicateEnvVar(name.as_str().to_string()).into()); @@ -312,7 +312,7 @@ mod derivation_builtins { } else { let mut refscan = state.reference_scanner(); drv.arguments.iter().for_each(|s| refscan.scan_str(s)); - drv.environment.values().for_each(|s| refscan.scan_str(s)); + drv.environment.values().for_each(|s| refscan.scan_bytes(s)); refscan.scan_str(&drv.builder); refscan.finalise() } @@ -324,7 +324,7 @@ mod derivation_builtins { for output in drv.outputs.keys() { if drv .environment - .insert(output.to_string(), String::new()) + .insert(output.to_string(), String::new().into()) .is_some() { emit_warning_kind(&co, WarningKind::ShadowedOutput(output.to_string())).await; diff --git a/tvix/cli/src/refscan.rs b/tvix/cli/src/refscan.rs index b391781f9299..25ada4c86560 100644 --- a/tvix/cli/src/refscan.rs +++ b/tvix/cli/src/refscan.rs @@ -37,7 +37,21 @@ impl<P: Clone + Ord + AsRef<[u8]>> ReferenceScanner<P> { } } - /// Scan the given string for all non-overlapping matches and collect them + /// If the given &[u8] is also a valid UTF-8 string, scan for all non- + /// overlapping matches and collect them in the scanner. + /// TODO: ideally, wu-manber would just work with &[u8] directly. + pub fn scan_bytes(&mut self, haystack: &[u8]) { + if haystack.len() < STORE_PATH_LEN { + return; + } + + match std::str::from_utf8(haystack) { + Ok(s) => self.scan_str(s), + Err(_) => {} + } + } + + /// Scan the given str for all non-overlapping matches and collect them /// in the scanner. pub fn scan_str(&mut self, haystack: &str) { if haystack.len() < STORE_PATH_LEN { diff --git a/tvix/nix-compat/Cargo.toml b/tvix/nix-compat/Cargo.toml index 49ddbf4728e9..47bbeb251742 100644 --- a/tvix/nix-compat/Cargo.toml +++ b/tvix/nix-compat/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" [dependencies] anyhow = "1.0.68" +bstr = { version = "1.6.0", features = ["alloc", "unicode", "serde"] } data-encoding = "2.3.3" glob = "0.3.0" serde = { version = "1.0", features = ["derive"] } diff --git a/tvix/nix-compat/src/derivation/escape.rs b/tvix/nix-compat/src/derivation/escape.rs new file mode 100644 index 000000000000..03106c44209e --- /dev/null +++ b/tvix/nix-compat/src/derivation/escape.rs @@ -0,0 +1,31 @@ +use bstr::{BString, ByteSlice}; + +pub fn escape_bstr(s: &[u8]) -> BString { + let mut s: Vec<u8> = s.to_owned(); + + s = s.replace(b"\\", b"\\\\"); + s = s.replace(b"\n", b"\\n"); + s = s.replace(b"\r", b"\\r"); + s = s.replace(b"\t", b"\\t"); + s = s.replace(b"\"", b"\\\""); + + let mut out: Vec<u8> = Vec::new(); + out.push(b'\"'); + out.append(&mut s); + out.push(b'\"'); + + out.into() +} + +#[cfg(test)] +mod tests { + use super::escape_bstr; + use test_case::test_case; + + #[test_case(b"", b"\"\""; "empty")] + #[test_case(b"\"", b"\"\\\"\""; "doublequote")] + #[test_case(b":", b"\":\""; "colon")] + fn escape(input: &[u8], expected: &[u8]) { + assert_eq!(expected, escape_bstr(input)) + } +} diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs index ab615d502d99..498aa1b59d4c 100644 --- a/tvix/nix-compat/src/derivation/mod.rs +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -1,13 +1,14 @@ use crate::store_path::{ self, build_output_path, build_regular_ca_path, build_text_path, StorePath, }; +use bstr::BString; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; use std::collections::{BTreeMap, BTreeSet}; mod errors; +mod escape; mod output; -mod string_escape; mod validate; mod write; @@ -27,7 +28,7 @@ pub struct Derivation { pub builder: String, #[serde(rename = "env")] - pub environment: BTreeMap<String, String>, + pub environment: BTreeMap<String, BString>, #[serde(rename = "inputDrvs")] pub input_derivations: BTreeMap<String, BTreeSet<String>>, @@ -41,12 +42,12 @@ pub struct Derivation { } impl Derivation { - /// write the Derivation to the given [std::fmt::Write], in ATerm format. + /// write the Derivation to the given [std::io::Write], in ATerm format. /// /// The only errors returns are these when writing to the passed writer. - pub fn serialize(&self, writer: &mut impl std::fmt::Write) -> Result<(), std::fmt::Error> { - writer.write_str(write::DERIVATION_PREFIX)?; - writer.write_char(write::PAREN_OPEN)?; + pub fn serialize(&self, writer: &mut impl std::io::Write) -> Result<(), std::io::Error> { + write::write_str(writer, write::DERIVATION_PREFIX)?; + write::write_char(writer, write::PAREN_OPEN)?; write::write_outputs(writer, &self.outputs)?; write::write_input_derivations(writer, &self.input_derivations)?; @@ -56,14 +57,14 @@ impl Derivation { write::write_arguments(writer, &self.arguments)?; write::write_enviroment(writer, &self.environment)?; - writer.write_char(write::PAREN_CLOSE)?; + write::write_char(writer, write::PAREN_CLOSE)?; Ok(()) } - /// return the ATerm serialization as a string. - pub fn to_aterm_string(&self) -> String { - let mut buffer = String::new(); + /// return the ATerm serialization. + pub fn to_aterm_bytes(&self) -> Vec<u8> { + let mut buffer: Vec<u8> = Vec::new(); // invoke serialize and write to the buffer. // Note we only propagate errors writing to the writer in serialize, @@ -93,7 +94,7 @@ impl Derivation { inputs }; - build_text_path(name, self.to_aterm_string(), references) + build_text_path(name, self.to_aterm_bytes(), references) .map_err(|_e| DerivationError::InvalidOutputName(name.to_string())) } @@ -165,7 +166,7 @@ impl Derivation { // write the ATerm of that to the hash function let mut hasher = Sha256::new(); - hasher.update(replaced_derivation.to_aterm_string()); + hasher.update(replaced_derivation.to_aterm_bytes()); hasher.finalize().to_vec() }); @@ -218,8 +219,10 @@ impl Derivation { }; output.path = abs_store_path.to_absolute_path(); - self.environment - .insert(output_name.to_string(), abs_store_path.to_absolute_path()); + self.environment.insert( + output_name.to_string(), + abs_store_path.to_absolute_path().into(), + ); } Ok(()) diff --git a/tvix/nix-compat/src/derivation/string_escape.rs b/tvix/nix-compat/src/derivation/string_escape.rs deleted file mode 100644 index 0e1dbe516f73..000000000000 --- a/tvix/nix-compat/src/derivation/string_escape.rs +++ /dev/null @@ -1,17 +0,0 @@ -const STRING_ESCAPER: [(char, &str); 5] = [ - ('\\', "\\\\"), - ('\n', "\\n"), - ('\r', "\\r"), - ('\t', "\\t"), - ('\"', "\\\""), -]; - -pub fn escape_string(s: &str) -> String { - let mut s_replaced = s.to_string(); - - for escape_sequence in STRING_ESCAPER { - s_replaced = s_replaced.replace(escape_sequence.0, escape_sequence.1); - } - - format!("\"{}\"", s_replaced) -} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv new file mode 100644 index 000000000000..6a7a35c58c3f --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252","","")],[],[],":",":",[],[("builder",":"),("chars","ÅÄÖ"),("name","cp1252"),("out","/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json new file mode 100644 index 000000000000..9d6ba8b7977f --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json @@ -0,0 +1,21 @@ +{ + "/nix/store/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv": { + "outputs": { + "out": { + "path": "/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252" + } + }, + "inputSrcs": [], + "inputDrvs": {}, + "system": ":", + "builder": ":", + "args": [], + "env": { + "builder": ":", + "chars": "ÅÄÖ", + "name": "cp1252", + "out": "/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252", + "system": ":" + } + } +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv new file mode 100644 index 000000000000..b19fd8eb2ce4 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1","","")],[],[],":",":",[],[("builder",":"),("chars","ÅÄÖ"),("name","latin1"),("out","/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json new file mode 100644 index 000000000000..ffd5c08da830 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json @@ -0,0 +1,21 @@ +{ + "/nix/store/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv": { + "outputs": { + "out": { + "path": "/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1" + } + }, + "inputSrcs": [], + "inputDrvs": {}, + "system": ":", + "builder": ":", + "args": [], + "env": { + "builder": ":", + "chars": "ÅÄÖ", + "name": "latin1", + "out": "/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1", + "system": ":" + } + } +} diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs index f4070b2496f4..8fbaa52e232d 100644 --- a/tvix/nix-compat/src/derivation/tests/mod.rs +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -2,7 +2,8 @@ use crate::derivation::output::Output; use crate::derivation::Derivation; use crate::nixhash::NixHash; use crate::store_path::StorePath; -use std::collections::BTreeSet; +use bstr::{BStr, BString}; +use std::collections::{BTreeMap, BTreeSet}; use std::fs::File; use std::io::Read; use std::path::Path; @@ -12,33 +13,43 @@ use test_generator::test_resources; const RESOURCES_PATHS: &str = "src/derivation/tests/derivation_tests"; -fn read_file(path: &str) -> String { +fn read_file(path: &str) -> BString { let path = Path::new(path); let mut file = File::open(path).unwrap(); - let mut data = String::new(); + let mut data = Vec::new(); - file.read_to_string(&mut data).unwrap(); + file.read_to_end(&mut data).unwrap(); - data + data.into() } #[test_resources("src/derivation/tests/derivation_tests/*.drv")] fn check_serizaliation(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } let data = read_file(&format!("{}.json", path_to_drv_file)); - let derivation: Derivation = serde_json::from_str(&data).expect("JSON was not well-formatted"); + let derivation: Derivation = + serde_json::from_slice(&data).expect("JSON was not well-formatted"); - let mut serialized_derivation = String::new(); + let mut serialized_derivation = Vec::new(); derivation.serialize(&mut serialized_derivation).unwrap(); let expected = read_file(path_to_drv_file); - assert_eq!(expected, serialized_derivation); + assert_eq!(expected, BStr::new(&serialized_derivation)); } #[test_resources("src/derivation/tests/derivation_tests/*.drv")] fn validate(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } let data = read_file(&format!("{}.json", path_to_drv_file)); - let derivation: Derivation = serde_json::from_str(&data).expect("JSON was not well-formatted"); + let derivation: Derivation = + serde_json::from_slice(&data).expect("JSON was not well-formatted"); derivation .validate(true) @@ -46,13 +57,18 @@ fn validate(path_to_drv_file: &str) { } #[test_resources("src/derivation/tests/derivation_tests/*.drv")] -fn check_to_aterm_string(path_to_drv_file: &str) { +fn check_to_aterm_bytes(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } let data = read_file(&format!("{}.json", path_to_drv_file)); - let derivation: Derivation = serde_json::from_str(&data).expect("JSON was not well-formatted"); + let derivation: Derivation = + serde_json::from_slice(&data).expect("JSON was not well-formatted"); let expected = read_file(path_to_drv_file); - assert_eq!(expected, derivation.to_aterm_string()); + assert_eq!(expected, BStr::new(&derivation.to_aterm_bytes())); } #[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")] @@ -64,7 +80,8 @@ fn check_to_aterm_string(path_to_drv_file: &str) { #[test_case("unicode", "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv"; "unicode")] fn derivation_path(name: &str, expected_path: &str) { let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, expected_path)); - let derivation: Derivation = serde_json::from_str(&data).expect("JSON was not well-formatted"); + let derivation: Derivation = + serde_json::from_slice(&data).expect("JSON was not well-formatted"); assert_eq!( derivation.calculate_derivation_path(name).unwrap(), @@ -79,7 +96,7 @@ fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation { let mut trimmed_outputs = derivation.outputs.clone(); for (output_name, output) in &derivation.outputs { - trimmed_env.insert(output_name.clone(), "".to_string()); + trimmed_env.insert(output_name.clone(), "".into()); assert!(trimmed_outputs.contains_key(output_name)); trimmed_outputs.insert( output_name.to_string(), @@ -103,7 +120,7 @@ fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation { fn derivation_or_fod_hash(drv_path: &str, expected_nix_hash_string: &str) { // read in the fixture let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, drv_path)); - let drv: Derivation = serde_json::from_str(&data).expect("must deserialize"); + let drv: Derivation = serde_json::from_slice(&data).expect("must deserialize"); let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called")); @@ -120,7 +137,7 @@ fn derivation_or_fod_hash(drv_path: &str, expected_nix_hash_string: &str) { fn output_paths(name: &str, drv_path: &str) { // read in the fixture let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, drv_path)); - let expected_derivation: Derivation = serde_json::from_str(&data).expect("must deserialize"); + let expected_derivation: Derivation = serde_json::from_slice(&data).expect("must deserialize"); let mut derivation = derivation_with_trimmed_output_paths(&expected_derivation); @@ -148,7 +165,7 @@ fn output_paths(name: &str, drv_path: &str) { .to_string_lossy() )); - let drv: Derivation = serde_json::from_str(&data).expect("must deserialize"); + let drv: Derivation = serde_json::from_slice(&data).expect("must deserialize"); // calculate derivation_or_fod_hash for each parent. // This may not trigger subsequent requests, as both parents are FOD. @@ -204,16 +221,16 @@ fn output_path_construction() { // assemble bar env let bar_env = &mut bar_drv.environment; - bar_env.insert("builder".to_string(), ":".to_string()); - bar_env.insert("name".to_string(), "bar".to_string()); - bar_env.insert("out".to_string(), "".to_string()); // will be calculated + bar_env.insert("builder".to_string(), ":".into()); + bar_env.insert("name".to_string(), "bar".into()); + bar_env.insert("out".to_string(), "".into()); // will be calculated bar_env.insert( "outputHash".to_string(), - "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba".to_string(), + "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba".into(), ); - bar_env.insert("outputHashAlgo".to_string(), "sha256".to_string()); - bar_env.insert("outputHashMode".to_string(), "recursive".to_string()); - bar_env.insert("system".to_string(), ":".to_string()); + bar_env.insert("outputHashAlgo".to_string(), "sha256".into()); + bar_env.insert("outputHashMode".to_string(), "recursive".into()); + bar_env.insert("system".to_string(), ":".into()); // assemble bar outputs bar_drv.outputs.insert( @@ -244,7 +261,7 @@ fn output_path_construction() { "{}/{}.json", RESOURCES_PATHS, "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" )); - let bar_drv_expected: Derivation = serde_json::from_str(&bar_data).expect("must deserialize"); + let bar_drv_expected: Derivation = serde_json::from_slice(&bar_data).expect("must deserialize"); assert_eq!(bar_drv_expected, bar_drv); // now construct foo, which requires bar_drv @@ -266,11 +283,11 @@ fn output_path_construction() { // assemble foo env let foo_env = &mut foo_drv.environment; - foo_env.insert("bar".to_string(), bar_output_path.to_string()); - foo_env.insert("builder".to_string(), ":".to_string()); - foo_env.insert("name".to_string(), "foo".to_string()); - foo_env.insert("out".to_string(), "".to_string()); // will be calculated - foo_env.insert("system".to_string(), ":".to_string()); + foo_env.insert("bar".to_string(), bar_output_path.to_owned().into()); + foo_env.insert("builder".to_string(), ":".into()); + foo_env.insert("name".to_string(), "foo".into()); + foo_env.insert("out".to_string(), "".into()); // will be calculated + foo_env.insert("system".to_string(), ":".into()); // asssemble foo outputs foo_drv.outputs.insert( @@ -304,7 +321,7 @@ fn output_path_construction() { "{}/{}.json", RESOURCES_PATHS, "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv", )); - let foo_drv_expected: Derivation = serde_json::from_str(&foo_data).expect("must deserialize"); + let foo_drv_expected: Derivation = serde_json::from_slice(&foo_data).expect("must deserialize"); assert_eq!(foo_drv_expected, foo_drv); assert_eq!( @@ -314,3 +331,83 @@ fn output_path_construction() { .expect("must succeed") ); } + +/// This constructs a Derivation using cp1252 encoding and ensures the +/// calculated derivation path matches the one Nix does calculate, as +/// well as the ATerm serialization. +/// We can't add this as a test_case to `output_paths`, as the JSON parser +/// refuses to parse our JSONs. +/// It looks like more recent versions of Nix also seem to not produce these +/// JSON files anymore, however, it still happily produces the .drv files in +/// the store. +#[test_case( + "cp1252", + vec![0xc5, 0xc4, 0xd6], + "/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252", + "m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv"; + "cp1252" +)] +#[test_case( + "latin1", + vec![0xc5, 0xc4, 0xd6], + "/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1", + "x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv"; + "latin1" +)] +fn non_unicode(name: &str, chars: Vec<u8>, exp_output_path: &str, exp_derivation_path: &str) { + // construct the Derivation + let mut outputs: BTreeMap<String, Output> = BTreeMap::new(); + outputs.insert( + "out".to_string(), + Output { + path: exp_output_path.to_string(), + ..Default::default() + }, + ); + + let mut environment: BTreeMap<String, BString> = BTreeMap::new(); + environment.insert("builder".to_string(), ":".into()); + environment.insert("chars".to_string(), chars.into()); + environment.insert("name".to_string(), name.into()); + environment.insert("out".to_string(), exp_output_path.into()); + environment.insert("system".to_string(), ":".into()); + let derivation: Derivation = Derivation { + builder: ":".to_string(), + environment, + outputs, + system: ":".to_string(), + ..Default::default() + }; + + // check the derivation_path matches what Nix calculated. + let actual_drv_path = derivation.calculate_derivation_path(name).unwrap(); + assert_eq!(exp_derivation_path.to_string(), actual_drv_path.to_string()); + + // Now wipe the output path info, and ensure we calculate the same output + // path. + { + let mut derivation = derivation_with_trimmed_output_paths(&derivation); + let calculated_derivation_or_fod_hash = derivation.derivation_or_fod_hash(|_| { + panic!("No parents expected"); + }); + derivation + .calculate_output_paths(name, &calculated_derivation_or_fod_hash) + .unwrap(); + + assert_eq!( + exp_output_path.to_string(), + derivation.outputs.get("out").unwrap().path, + "expected calculated output path to match" + ); + } + + // Construct the ATerm representation and compare with our fixture. + { + let data = read_file(&format!("{}/{}", RESOURCES_PATHS, exp_derivation_path)); + assert_eq!( + data, + BStr::new(&derivation.to_aterm_bytes()), + "expected ATerm serialization to match", + ); + } +} diff --git a/tvix/nix-compat/src/derivation/write.rs b/tvix/nix-compat/src/derivation/write.rs index 52166294e078..cf62f850224f 100644 --- a/tvix/nix-compat/src/derivation/write.rs +++ b/tvix/nix-compat/src/derivation/write.rs @@ -3,10 +3,12 @@ //! //! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html +use crate::derivation::escape::escape_bstr; use crate::derivation::output::Output; -use crate::derivation::string_escape::escape_string; +use bstr::BString; use std::collections::BTreeSet; -use std::{collections::BTreeMap, fmt, fmt::Write}; +use std::io::Cursor; +use std::{collections::BTreeMap, io, io::Error, io::Write}; pub const DERIVATION_PREFIX: &str = "Derive"; pub const PAREN_OPEN: char = '('; @@ -16,32 +18,46 @@ pub const BRACKET_CLOSE: char = ']'; pub const COMMA: char = ','; pub const QUOTE: char = '"'; +// Writes a character to the writer. +pub(crate) fn write_char(writer: &mut impl Write, c: char) -> io::Result<()> { + let mut buf = [0; 4]; + let b = c.encode_utf8(&mut buf).as_bytes(); + io::copy(&mut Cursor::new(b), writer)?; + Ok(()) +} + +// Writes a string to the writer (as unicode) +pub(crate) fn write_str(writer: &mut impl Write, s: &str) -> io::Result<()> { + io::copy(&mut Cursor::new(s.as_bytes()), writer)?; + Ok(()) +} + fn write_array_elements( writer: &mut impl Write, quote: bool, open: &str, closing: &str, - elements: Vec<&str>, -) -> Result<(), fmt::Error> { - writer.write_str(open)?; + elements: &[BString], +) -> Result<(), io::Error> { + write_str(writer, open)?; for (index, element) in elements.iter().enumerate() { if index > 0 { - writer.write_char(COMMA)?; + write_char(writer, COMMA)?; } if quote { - writer.write_char(QUOTE)?; + write_char(writer, QUOTE)?; } - writer.write_str(element)?; + io::copy(&mut Cursor::new(element), writer)?; if quote { - writer.write_char(QUOTE)?; + write_char(writer, QUOTE)?; } } - writer.write_str(closing)?; + write_str(writer, closing)?; Ok(()) } @@ -49,41 +65,44 @@ fn write_array_elements( pub fn write_outputs( writer: &mut impl Write, outputs: &BTreeMap<String, Output>, -) -> Result<(), fmt::Error> { - writer.write_char(BRACKET_OPEN)?; +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; for (ii, (output_name, output)) in outputs.iter().enumerate() { if ii > 0 { - writer.write_char(COMMA)?; + write_char(writer, COMMA)?; } - let mut elements: Vec<&str> = vec![output_name, &output.path]; + let mut elements: Vec<BString> = vec![ + output_name.as_bytes().to_vec().into(), + output.path.as_bytes().to_vec().into(), + ]; let (e2, e3) = match &output.hash_with_mode { Some(hash) => match hash { crate::nixhash::NixHashWithMode::Flat(h) => ( - h.algo.to_string(), - data_encoding::HEXLOWER.encode(&h.digest), + h.algo.to_string().as_bytes().to_vec(), + data_encoding::HEXLOWER.encode(&h.digest).as_bytes().into(), ), crate::nixhash::NixHashWithMode::Recursive(h) => ( - format!("r:{}", h.algo), - data_encoding::HEXLOWER.encode(&h.digest), + format!("r:{}", h.algo).as_bytes().to_vec(), + data_encoding::HEXLOWER.encode(&h.digest).as_bytes().into(), ), }, - None => ("".to_string(), "".to_string()), + None => (vec![], vec![]), }; - elements.push(&e2); - elements.push(&e3); + elements.push(e2.into()); + elements.push(e3.into()); write_array_elements( writer, true, &PAREN_OPEN.to_string(), &PAREN_CLOSE.to_string(), - elements, + &elements, )? } - writer.write_char(BRACKET_CLOSE)?; + write_char(writer, BRACKET_CLOSE)?; Ok(()) } @@ -91,33 +110,37 @@ pub fn write_outputs( pub fn write_input_derivations( writer: &mut impl Write, input_derivations: &BTreeMap<String, BTreeSet<String>>, -) -> Result<(), fmt::Error> { - writer.write_char(COMMA)?; - writer.write_char(BRACKET_OPEN)?; +) -> Result<(), io::Error> { + write_char(writer, COMMA)?; + write_char(writer, BRACKET_OPEN)?; - for (ii, (input_derivation_path, input_derivation)) in input_derivations.iter().enumerate() { + for (ii, (input_derivation_path, input_derivation)) in input_derivations.into_iter().enumerate() + { if ii > 0 { - writer.write_char(COMMA)?; + write_char(writer, COMMA)?; } - writer.write_char(PAREN_OPEN)?; - writer.write_char(QUOTE)?; - writer.write_str(input_derivation_path.as_str())?; - writer.write_char(QUOTE)?; - writer.write_char(COMMA)?; + write_char(writer, PAREN_OPEN)?; + write_char(writer, QUOTE)?; + write_str(writer, input_derivation_path.as_str())?; + write_char(writer, QUOTE)?; + write_char(writer, COMMA)?; write_array_elements( writer, true, &BRACKET_OPEN.to_string(), &BRACKET_CLOSE.to_string(), - input_derivation.iter().map(|s| &**s).collect(), + &input_derivation + .iter() + .map(|s| s.as_bytes().to_vec().into()) + .collect::<Vec<BString>>(), )?; - writer.write_char(PAREN_CLOSE)?; + write_char(writer, PAREN_CLOSE)?; } - writer.write_char(BRACKET_CLOSE)?; + write_char(writer, BRACKET_CLOSE)?; Ok(()) } @@ -125,39 +148,45 @@ pub fn write_input_derivations( pub fn write_input_sources( writer: &mut impl Write, input_sources: &BTreeSet<String>, -) -> Result<(), fmt::Error> { - writer.write_char(COMMA)?; +) -> Result<(), io::Error> { + write_char(writer, COMMA)?; write_array_elements( writer, true, &BRACKET_OPEN.to_string(), &BRACKET_CLOSE.to_string(), - input_sources.iter().map(|s| &**s).collect(), + &input_sources + .iter() + .map(|s| s.as_bytes().to_vec().into()) + .collect::<Vec<BString>>(), )?; Ok(()) } -pub fn write_system(writer: &mut impl Write, platform: &str) -> Result<(), fmt::Error> { - writer.write_char(COMMA)?; - writer.write_str(escape_string(platform).as_str())?; +pub fn write_system(writer: &mut impl Write, platform: &str) -> Result<(), Error> { + write_char(writer, COMMA)?; + io::copy(&mut Cursor::new(escape_bstr(platform.as_bytes())), writer)?; Ok(()) } -pub fn write_builder(writer: &mut impl Write, builder: &str) -> Result<(), fmt::Error> { - writer.write_char(COMMA)?; - writer.write_str(escape_string(builder).as_str())?; +pub fn write_builder(writer: &mut impl Write, builder: &str) -> Result<(), Error> { + write_char(writer, COMMA)?; + io::copy(&mut Cursor::new(escape_bstr(builder.as_bytes())), writer)?; Ok(()) } -pub fn write_arguments(writer: &mut impl Write, arguments: &[String]) -> Result<(), fmt::Error> { - writer.write_char(COMMA)?; +pub fn write_arguments(writer: &mut impl Write, arguments: &[String]) -> Result<(), io::Error> { + write_char(writer, COMMA)?; write_array_elements( writer, true, &BRACKET_OPEN.to_string(), &BRACKET_CLOSE.to_string(), - arguments.iter().map(|s| &**s).collect(), + &arguments + .iter() + .map(|s| s.as_bytes().to_vec().into()) + .collect::<Vec<BString>>(), )?; Ok(()) @@ -165,14 +194,14 @@ pub fn write_arguments(writer: &mut impl Write, arguments: &[String]) -> Result< pub fn write_enviroment( writer: &mut impl Write, - environment: &BTreeMap<String, String>, -) -> Result<(), fmt::Error> { - writer.write_char(COMMA)?; - writer.write_char(BRACKET_OPEN)?; - - for (ii, (key, environment)) in environment.iter().enumerate() { - if ii > 0 { - writer.write_char(COMMA)?; + environment: &BTreeMap<String, BString>, +) -> Result<(), io::Error> { + write_char(writer, COMMA)?; + write_char(writer, BRACKET_OPEN)?; + + for (i, (k, v)) in environment.into_iter().enumerate() { + if i > 0 { + write_char(writer, COMMA)?; } write_array_elements( @@ -180,11 +209,11 @@ pub fn write_enviroment( false, &PAREN_OPEN.to_string(), &PAREN_CLOSE.to_string(), - vec![&escape_string(key), &escape_string(environment)], + &[escape_bstr(k.as_bytes()), escape_bstr(v)], )?; } - writer.write_char(BRACKET_CLOSE)?; + write_char(writer, BRACKET_CLOSE)?; Ok(()) } |