From 357c4d4836180975b4699877b2344d1b2c88d690 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Thu, 29 Dec 2022 22:47:02 +0100 Subject: feat(tvix/store): add nixbase32 mod This implements the nix-specific base32 encoding and decoding, exposing a subset of the API that the data-encoding crate provides. Nix uses a custom alphabet, no padding, and encodes bytes in reverse order. The latter one is the reason we can't just use the data-encoding crate directly. Three odd corner case tests ported over from go-nix failed. We opened b/235 to further investigate. Change-Id: I73fab6ddd67177d882e4c3f2b48761c95853d558 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7683 Reviewed-by: tazjin Autosubmit: flokli Tested-by: BuildkiteCI --- tvix/Cargo.lock | 30 +++++++++++ tvix/Cargo.nix | 122 +++++++++++++++++++++++++++++++++----------- tvix/store/Cargo.toml | 8 +-- tvix/store/src/main.rs | 1 + tvix/store/src/nixbase32.rs | 119 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 247 insertions(+), 33 deletions(-) create mode 100644 tvix/store/src/nixbase32.rs (limited to 'tvix') diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index f2c0e391caf3..a79c6d613517 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -473,6 +473,12 @@ dependencies = [ "syn 1.0.103", ] +[[package]] +name = "data-encoding" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23d8666cb01533c39dde32bcbab8e227b4ed6679b2c925eba05feabea39508fb" + [[package]] name = "derivation" version = "0.1.0" @@ -1785,6 +1791,28 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "test-case" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21d6cf5a7dffb3f9dceec8e6b8ca528d9bd71d36c9f074defb548ce161f598c0" +dependencies = [ + "test-case-macros", +] + +[[package]] +name = "test-case-macros" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e45b7bf6e19353ddd832745c8fcf77a17a93171df7151187f26623f2b75b5b26" +dependencies = [ + "cfg-if", + "proc-macro-error", + "proc-macro2 1.0.47", + "quote 1.0.21", + "syn 1.0.103", +] + [[package]] name = "test-generator" version = "0.3.0" @@ -2116,9 +2144,11 @@ version = "0.1.0" dependencies = [ "anyhow", "blake3", + "data-encoding", "lazy_static", "prost", "prost-build", + "test-case", "thiserror", "tonic", "tonic-build", diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index a72a8d3277f0..89185708f89b 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -1438,6 +1438,20 @@ rec { ]; }; + "data-encoding" = rec { + crateName = "data-encoding"; + version = "2.3.3"; + edition = "2018"; + sha256 = "1yq8jnivxsjzl3mjbjdjg5kfvd17wawbmg1jvsfw6cqmn1n6dn13"; + authors = [ + "Julien Cretin " + ]; + features = { + "default" = [ "std" ]; + "std" = [ "alloc" ]; + }; + resolvedDefaultFeatures = [ "alloc" "default" "std" ]; + }; "derivation" = rec { crateName = "derivation"; version = "0.1.0"; @@ -1450,31 +1464,23 @@ rec { else ./derivation; dependencies = [ { - name = "blake3"; - packageId = "blake3"; - features = [ "rayon" "std" ]; - } - { - name = "maplit"; - packageId = "maplit"; - } - { - name = "prost"; - packageId = "prost"; + name = "glob"; + packageId = "glob"; } { - name = "tonic"; - packageId = "tonic"; + name = "serde"; + packageId = "serde"; + features = [ "derive" ]; } ]; - buildDependencies = [ + devDependencies = [ { - name = "prost-build"; - packageId = "prost-build"; + name = "serde_json"; + packageId = "serde_json"; } { - name = "tonic-build"; - packageId = "tonic-build"; + name = "test-generator"; + packageId = "test-generator"; } ]; @@ -2724,16 +2730,6 @@ rec { "value-bag" = [ "dep:value-bag" ]; }; }; - "maplit" = rec { - crateName = "maplit"; - version = "1.0.2"; - edition = "2015"; - sha256 = "07b5kjnhrrmfhgqm9wprjw8adx6i225lqp49gasgqg74lahnabiy"; - authors = [ - "bluss" - ]; - - }; "matchit" = rec { crateName = "matchit"; version = "0.7.0"; @@ -5091,6 +5087,64 @@ rec { ]; }; + "test-case" = rec { + crateName = "test-case"; + version = "2.2.2"; + edition = "2018"; + sha256 = "1h4qymhy332lzgg79w696qfxg6wdab5birn8xvfgkczzgmdczmi1"; + authors = [ + "Marcin Sas-Szymanski " + "Wojciech Polak " + "Łukasz Biel " + ]; + dependencies = [ + { + name = "test-case-macros"; + packageId = "test-case-macros"; + usesDefaultFeatures = false; + } + ]; + features = { + "regex" = [ "dep:regex" ]; + "with-regex" = [ "regex" "test-case-macros/with-regex" ]; + }; + }; + "test-case-macros" = rec { + crateName = "test-case-macros"; + version = "2.2.2"; + edition = "2018"; + sha256 = "09jvbfvz48v6ya3i25gp3lbr6ym1fz7qyp3l6bcdslwkw7v7nnz4"; + procMacro = true; + authors = [ + "Marcin Sas-Szymanski " + "Wojciech Polak " + "Łukasz Biel " + ]; + dependencies = [ + { + name = "cfg-if"; + packageId = "cfg-if"; + } + { + name = "proc-macro-error"; + packageId = "proc-macro-error"; + } + { + name = "proc-macro2"; + packageId = "proc-macro2 1.0.47"; + } + { + name = "quote"; + packageId = "quote 1.0.21"; + } + { + name = "syn"; + packageId = "syn 1.0.103"; + features = [ "full" "extra-traits" ]; + } + ]; + features = { }; + }; "test-generator" = rec { crateName = "test-generator"; version = "0.3.0"; @@ -6335,6 +6389,14 @@ rec { packageId = "blake3"; features = [ "rayon" "std" ]; } + { + name = "data-encoding"; + packageId = "data-encoding"; + } + { + name = "lazy_static"; + packageId = "lazy_static"; + } { name = "prost"; packageId = "prost"; @@ -6360,8 +6422,8 @@ rec { ]; devDependencies = [ { - name = "lazy_static"; - packageId = "lazy_static"; + name = "test-case"; + packageId = "test-case"; } ]; diff --git a/tvix/store/Cargo.toml b/tvix/store/Cargo.toml index a14fff99d62d..ee8c1191ee5d 100644 --- a/tvix/store/Cargo.toml +++ b/tvix/store/Cargo.toml @@ -6,13 +6,15 @@ edition = "2021" [dependencies] anyhow = "1.0.68" blake3 = { version = "1.3.1", features = ["rayon", "std"] } +data-encoding = "2.3.3" +lazy_static = "1.4.0" prost = "0.11.2" thiserror = "1.0.38" tonic = "0.8.2" -[dev-dependencies] -lazy_static = "1.4.0" - [build-dependencies] prost-build = "0.11.2" tonic-build = "0.8.2" + +[dev-dependencies] +test-case = "2.2.2" diff --git a/tvix/store/src/main.rs b/tvix/store/src/main.rs index 772a45d0bd59..cca96c1bbb57 100644 --- a/tvix/store/src/main.rs +++ b/tvix/store/src/main.rs @@ -1,3 +1,4 @@ +mod nixbase32; mod proto; #[cfg(test)] diff --git a/tvix/store/src/nixbase32.rs b/tvix/store/src/nixbase32.rs new file mode 100644 index 000000000000..8be9f1b6ea19 --- /dev/null +++ b/tvix/store/src/nixbase32.rs @@ -0,0 +1,119 @@ +//! Implements the slightly odd "base32" encoding that's used in Nix. +//! +//! Nix uses a custom alphabet. Contrary to other implementations (RFC4648), +//! encoding to "nix base32" doesn't use any padding, and reads in characters +//! in reverse order. +//! +//! This is also the main reason why `data_encoding::Encoding` can't be used +//! directly, but this module aims to provide a similar interface (with some +//! methods omitted). +use data_encoding::{DecodeError, Encoding, Specification}; +use lazy_static::lazy_static; + +/// Nixbase32Encoding wraps a data_encoding::Encoding internally. +/// We can't use it directly, as nix also reads in characters in reverse order. +pub struct Nixbase32Encoding { + encoding: Encoding, +} + +lazy_static! { + /// Returns a Nixbase32Encoding providing some functions seen on a data_encoding::Encoding. + pub static ref NIXBASE32: Nixbase32Encoding = nixbase32_encoding(); +} + +/// Populates the Nixbase32Encoding struct with a data_encoding::Encoding, +/// using the nixbase32 alphabet and config. +fn nixbase32_encoding() -> Nixbase32Encoding { + let mut spec = Specification::new(); + spec.symbols.push_str("0123456789abcdfghijklmnpqrsvwxyz"); + + Nixbase32Encoding { + encoding: spec.encoding().unwrap(), + } +} + +impl Nixbase32Encoding { + /// Returns encoded input + pub fn encode(&self, input: &[u8]) -> String { + // Reverse the input, reading in the bytes in reverse order. + let mut reversed = Vec::with_capacity(input.len()); + reversed.extend(input.iter().rev()); + self.encoding.encode(&reversed) + } + + /// Returns decoded input + /// Check [data_encoding::Encoding::encode] for the error cases. + pub fn decode(&self, input: &[u8]) -> Result, DecodeError> { + // Decode first, then reverse the bytes of the output. + let output = self.encoding.decode(input)?; + + let mut reversed = Vec::with_capacity(output.len()); + reversed.extend(output.iter().rev()); + Ok(reversed) + } + + /// Returns the decoded length of an input of length len. + /// Check [data_encoding::Encoding::decode_len] for the error cases. + pub fn decode_len(&self, len: usize) -> Result { + self.encoding.decode_len(len) + } + + /// Returns the encoded length of an input of length len + pub fn encode_len(&self, len: usize) -> usize { + self.encoding.encode_len(len) + } +} + +#[cfg(test)] +mod tests { + use crate::nixbase32::NIXBASE32; + use test_case::test_case; + + #[test_case("", vec![] ; "empty bytes")] + // FUTUREWORK: b/235 + // this seems to encode to 3w? + // #[test_case("0z", vec![0x1f]; "one byte")] + #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", vec![ + 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a, + 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]; "nixpath")] + fn encode(enc: &str, dec: Vec) { + assert_eq!(enc, NIXBASE32.encode(&dec)); + } + + #[test_case("", Some(vec![]) ; "empty bytes")] + // FUTUREWORK: b/235 + // this seems to require spec.check_trailing_bits and still fails? + // #[test_case("0z", Some(vec![0x1f]); "one byte")] + #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", Some(vec![ + 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a, + 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]); "nixpath")] + // this is invalid encoding, because it encodes 10 1-bytes, so the carry + // would be 2 1-bytes + #[test_case("zz", None; "invalid encoding-1")] + // this is an even more specific example - it'd decode as 00000000 11 + // FUTUREWORK: b/235 + // #[test_case("c0", None; "invalid encoding-2")] + + fn decode(enc: &str, dec: Option>) { + match dec { + Some(dec) => { + // The decode needs to match what's passed in dec + assert_eq!(dec, NIXBASE32.decode(enc.as_bytes()).unwrap()); + } + None => { + // the decode needs to be an error + assert_eq!(true, NIXBASE32.decode(enc.as_bytes()).is_err()); + } + } + } + + #[test] + fn encode_len() { + assert_eq!(NIXBASE32.encode_len(20), 32) + } + + #[test] + fn decode_len() { + assert_eq!(NIXBASE32.decode_len(32).unwrap(), 20) + } +} -- cgit 1.4.1