about summary refs log tree commit diff
path: root/tvix
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2022-12-29T21·47+0100
committerflokli <flokli@flokli.de>2022-12-30T13·31+0000
commit357c4d4836180975b4699877b2344d1b2c88d690 (patch)
tree544aaef634a702cc02f4adb5c6498855064402a9 /tvix
parent5ba47a2bc39b95fa4ece5d52ce08bfeda130367f (diff)
feat(tvix/store): add nixbase32 mod r/5550
This implements the nix-specific base32 encoding and decoding, exposing
a subset of the API that the data-encoding crate provides.

Nix uses a custom alphabet, no padding, and encodes bytes in reverse
order. The latter one is the reason we can't just use the data-encoding
crate directly.

Three odd corner case tests ported over from go-nix failed. We opened
b/235 to further investigate.

Change-Id: I73fab6ddd67177d882e4c3f2b48761c95853d558
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7683
Reviewed-by: tazjin <tazjin@tvl.su>
Autosubmit: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix')
-rw-r--r--tvix/Cargo.lock30
-rw-r--r--tvix/Cargo.nix122
-rw-r--r--tvix/store/Cargo.toml8
-rw-r--r--tvix/store/src/main.rs1
-rw-r--r--tvix/store/src/nixbase32.rs119
5 files changed, 247 insertions, 33 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock
index f2c0e391ca..a79c6d6135 100644
--- a/tvix/Cargo.lock
+++ b/tvix/Cargo.lock
@@ -474,6 +474,12 @@ dependencies = [
 ]
 
 [[package]]
+name = "data-encoding"
+version = "2.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23d8666cb01533c39dde32bcbab8e227b4ed6679b2c925eba05feabea39508fb"
+
+[[package]]
 name = "derivation"
 version = "0.1.0"
 dependencies = [
@@ -1786,6 +1792,28 @@ dependencies = [
 ]
 
 [[package]]
+name = "test-case"
+version = "2.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "21d6cf5a7dffb3f9dceec8e6b8ca528d9bd71d36c9f074defb548ce161f598c0"
+dependencies = [
+ "test-case-macros",
+]
+
+[[package]]
+name = "test-case-macros"
+version = "2.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e45b7bf6e19353ddd832745c8fcf77a17a93171df7151187f26623f2b75b5b26"
+dependencies = [
+ "cfg-if",
+ "proc-macro-error",
+ "proc-macro2 1.0.47",
+ "quote 1.0.21",
+ "syn 1.0.103",
+]
+
+[[package]]
 name = "test-generator"
 version = "0.3.0"
 source = "git+https://github.com/JamesGuthrie/test-generator.git?rev=82e799979980962aec1aa324ec6e0e4cad781f41#82e799979980962aec1aa324ec6e0e4cad781f41"
@@ -2116,9 +2144,11 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "blake3",
+ "data-encoding",
  "lazy_static",
  "prost",
  "prost-build",
+ "test-case",
  "thiserror",
  "tonic",
  "tonic-build",
diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix
index a72a8d3277..89185708f8 100644
--- a/tvix/Cargo.nix
+++ b/tvix/Cargo.nix
@@ -1438,6 +1438,20 @@ rec {
         ];
 
       };
+      "data-encoding" = rec {
+        crateName = "data-encoding";
+        version = "2.3.3";
+        edition = "2018";
+        sha256 = "1yq8jnivxsjzl3mjbjdjg5kfvd17wawbmg1jvsfw6cqmn1n6dn13";
+        authors = [
+          "Julien Cretin <git@ia0.eu>"
+        ];
+        features = {
+          "default" = [ "std" ];
+          "std" = [ "alloc" ];
+        };
+        resolvedDefaultFeatures = [ "alloc" "default" "std" ];
+      };
       "derivation" = rec {
         crateName = "derivation";
         version = "0.1.0";
@@ -1450,31 +1464,23 @@ rec {
           else ./derivation;
         dependencies = [
           {
-            name = "blake3";
-            packageId = "blake3";
-            features = [ "rayon" "std" ];
-          }
-          {
-            name = "maplit";
-            packageId = "maplit";
-          }
-          {
-            name = "prost";
-            packageId = "prost";
+            name = "glob";
+            packageId = "glob";
           }
           {
-            name = "tonic";
-            packageId = "tonic";
+            name = "serde";
+            packageId = "serde";
+            features = [ "derive" ];
           }
         ];
-        buildDependencies = [
+        devDependencies = [
           {
-            name = "prost-build";
-            packageId = "prost-build";
+            name = "serde_json";
+            packageId = "serde_json";
           }
           {
-            name = "tonic-build";
-            packageId = "tonic-build";
+            name = "test-generator";
+            packageId = "test-generator";
           }
         ];
 
@@ -2724,16 +2730,6 @@ rec {
           "value-bag" = [ "dep:value-bag" ];
         };
       };
-      "maplit" = rec {
-        crateName = "maplit";
-        version = "1.0.2";
-        edition = "2015";
-        sha256 = "07b5kjnhrrmfhgqm9wprjw8adx6i225lqp49gasgqg74lahnabiy";
-        authors = [
-          "bluss"
-        ];
-
-      };
       "matchit" = rec {
         crateName = "matchit";
         version = "0.7.0";
@@ -5091,6 +5087,64 @@ rec {
         ];
 
       };
+      "test-case" = rec {
+        crateName = "test-case";
+        version = "2.2.2";
+        edition = "2018";
+        sha256 = "1h4qymhy332lzgg79w696qfxg6wdab5birn8xvfgkczzgmdczmi1";
+        authors = [
+          "Marcin Sas-Szymanski <marcin.sas-szymanski@anixe.pl>"
+          "Wojciech Polak <frondeus@gmail.com>"
+          "Łukasz Biel <lukasz.p.biel@gmail.com>"
+        ];
+        dependencies = [
+          {
+            name = "test-case-macros";
+            packageId = "test-case-macros";
+            usesDefaultFeatures = false;
+          }
+        ];
+        features = {
+          "regex" = [ "dep:regex" ];
+          "with-regex" = [ "regex" "test-case-macros/with-regex" ];
+        };
+      };
+      "test-case-macros" = rec {
+        crateName = "test-case-macros";
+        version = "2.2.2";
+        edition = "2018";
+        sha256 = "09jvbfvz48v6ya3i25gp3lbr6ym1fz7qyp3l6bcdslwkw7v7nnz4";
+        procMacro = true;
+        authors = [
+          "Marcin Sas-Szymanski <marcin.sas-szymanski@anixe.pl>"
+          "Wojciech Polak <frondeus@gmail.com>"
+          "Łukasz Biel <lukasz.p.biel@gmail.com>"
+        ];
+        dependencies = [
+          {
+            name = "cfg-if";
+            packageId = "cfg-if";
+          }
+          {
+            name = "proc-macro-error";
+            packageId = "proc-macro-error";
+          }
+          {
+            name = "proc-macro2";
+            packageId = "proc-macro2 1.0.47";
+          }
+          {
+            name = "quote";
+            packageId = "quote 1.0.21";
+          }
+          {
+            name = "syn";
+            packageId = "syn 1.0.103";
+            features = [ "full" "extra-traits" ];
+          }
+        ];
+        features = { };
+      };
       "test-generator" = rec {
         crateName = "test-generator";
         version = "0.3.0";
@@ -6336,6 +6390,14 @@ rec {
             features = [ "rayon" "std" ];
           }
           {
+            name = "data-encoding";
+            packageId = "data-encoding";
+          }
+          {
+            name = "lazy_static";
+            packageId = "lazy_static";
+          }
+          {
             name = "prost";
             packageId = "prost";
           }
@@ -6360,8 +6422,8 @@ rec {
         ];
         devDependencies = [
           {
-            name = "lazy_static";
-            packageId = "lazy_static";
+            name = "test-case";
+            packageId = "test-case";
           }
         ];
 
diff --git a/tvix/store/Cargo.toml b/tvix/store/Cargo.toml
index a14fff99d6..ee8c1191ee 100644
--- a/tvix/store/Cargo.toml
+++ b/tvix/store/Cargo.toml
@@ -6,13 +6,15 @@ edition = "2021"
 [dependencies]
 anyhow = "1.0.68"
 blake3 = { version = "1.3.1", features = ["rayon", "std"] }
+data-encoding = "2.3.3"
+lazy_static = "1.4.0"
 prost = "0.11.2"
 thiserror = "1.0.38"
 tonic = "0.8.2"
 
-[dev-dependencies]
-lazy_static = "1.4.0"
-
 [build-dependencies]
 prost-build = "0.11.2"
 tonic-build = "0.8.2"
+
+[dev-dependencies]
+test-case = "2.2.2"
diff --git a/tvix/store/src/main.rs b/tvix/store/src/main.rs
index 772a45d0bd..cca96c1bbb 100644
--- a/tvix/store/src/main.rs
+++ b/tvix/store/src/main.rs
@@ -1,3 +1,4 @@
+mod nixbase32;
 mod proto;
 
 #[cfg(test)]
diff --git a/tvix/store/src/nixbase32.rs b/tvix/store/src/nixbase32.rs
new file mode 100644
index 0000000000..8be9f1b6ea
--- /dev/null
+++ b/tvix/store/src/nixbase32.rs
@@ -0,0 +1,119 @@
+//! Implements the slightly odd "base32" encoding that's used in Nix.
+//!
+//! Nix uses a custom alphabet. Contrary to other implementations (RFC4648),
+//! encoding to "nix base32" doesn't use any padding, and reads in characters
+//! in reverse order.
+//!
+//! This is also the main reason why `data_encoding::Encoding` can't be used
+//! directly, but this module aims to provide a similar interface (with some
+//! methods omitted).
+use data_encoding::{DecodeError, Encoding, Specification};
+use lazy_static::lazy_static;
+
+/// Nixbase32Encoding wraps a data_encoding::Encoding internally.
+/// We can't use it directly, as nix also reads in characters in reverse order.
+pub struct Nixbase32Encoding {
+    encoding: Encoding,
+}
+
+lazy_static! {
+    /// Returns a Nixbase32Encoding providing some functions seen on a data_encoding::Encoding.
+    pub static ref NIXBASE32: Nixbase32Encoding = nixbase32_encoding();
+}
+
+/// Populates the Nixbase32Encoding struct with a data_encoding::Encoding,
+/// using the nixbase32 alphabet and config.
+fn nixbase32_encoding() -> Nixbase32Encoding {
+    let mut spec = Specification::new();
+    spec.symbols.push_str("0123456789abcdfghijklmnpqrsvwxyz");
+
+    Nixbase32Encoding {
+        encoding: spec.encoding().unwrap(),
+    }
+}
+
+impl Nixbase32Encoding {
+    /// Returns encoded input
+    pub fn encode(&self, input: &[u8]) -> String {
+        // Reverse the input, reading in the bytes in reverse order.
+        let mut reversed = Vec::with_capacity(input.len());
+        reversed.extend(input.iter().rev());
+        self.encoding.encode(&reversed)
+    }
+
+    /// Returns decoded input
+    /// Check [data_encoding::Encoding::encode] for the error cases.
+    pub fn decode(&self, input: &[u8]) -> Result<Vec<u8>, DecodeError> {
+        // Decode first, then reverse the bytes of the output.
+        let output = self.encoding.decode(input)?;
+
+        let mut reversed = Vec::with_capacity(output.len());
+        reversed.extend(output.iter().rev());
+        Ok(reversed)
+    }
+
+    /// Returns the decoded length of an input of length len.
+    /// Check [data_encoding::Encoding::decode_len] for the error cases.
+    pub fn decode_len(&self, len: usize) -> Result<usize, DecodeError> {
+        self.encoding.decode_len(len)
+    }
+
+    /// Returns the encoded length of an input of length len
+    pub fn encode_len(&self, len: usize) -> usize {
+        self.encoding.encode_len(len)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::nixbase32::NIXBASE32;
+    use test_case::test_case;
+
+    #[test_case("", vec![] ; "empty bytes")]
+    // FUTUREWORK: b/235
+    // this seems to encode to 3w?
+    // #[test_case("0z", vec![0x1f]; "one byte")]
+    #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", vec![
+                 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a,
+                 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]; "nixpath")]
+    fn encode(enc: &str, dec: Vec<u8>) {
+        assert_eq!(enc, NIXBASE32.encode(&dec));
+    }
+
+    #[test_case("", Some(vec![]) ; "empty bytes")]
+    // FUTUREWORK: b/235
+    // this seems to require spec.check_trailing_bits and still fails?
+    // #[test_case("0z", Some(vec![0x1f]); "one byte")]
+    #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", Some(vec![
+                 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a,
+                 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]); "nixpath")]
+    // this is invalid encoding, because it encodes 10 1-bytes, so the carry
+    // would be 2 1-bytes
+    #[test_case("zz", None; "invalid encoding-1")]
+    // this is an even more specific example - it'd decode as 00000000 11
+    // FUTUREWORK: b/235
+    // #[test_case("c0", None; "invalid encoding-2")]
+
+    fn decode(enc: &str, dec: Option<Vec<u8>>) {
+        match dec {
+            Some(dec) => {
+                // The decode needs to match what's passed in dec
+                assert_eq!(dec, NIXBASE32.decode(enc.as_bytes()).unwrap());
+            }
+            None => {
+                // the decode needs to be an error
+                assert_eq!(true, NIXBASE32.decode(enc.as_bytes()).is_err());
+            }
+        }
+    }
+
+    #[test]
+    fn encode_len() {
+        assert_eq!(NIXBASE32.encode_len(20), 32)
+    }
+
+    #[test]
+    fn decode_len() {
+        assert_eq!(NIXBASE32.decode_len(32).unwrap(), 20)
+    }
+}