about summary refs log tree commit diff
path: root/tvix/store
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2022-12-29T21·47+0100
committerflokli <flokli@flokli.de>2022-12-30T13·31+0000
commit357c4d4836180975b4699877b2344d1b2c88d690 (patch)
tree544aaef634a702cc02f4adb5c6498855064402a9 /tvix/store
parent5ba47a2bc39b95fa4ece5d52ce08bfeda130367f (diff)
feat(tvix/store): add nixbase32 mod r/5550
This implements the nix-specific base32 encoding and decoding, exposing
a subset of the API that the data-encoding crate provides.

Nix uses a custom alphabet, no padding, and encodes bytes in reverse
order. The latter one is the reason we can't just use the data-encoding
crate directly.

Three odd corner case tests ported over from go-nix failed. We opened
b/235 to further investigate.

Change-Id: I73fab6ddd67177d882e4c3f2b48761c95853d558
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7683
Reviewed-by: tazjin <tazjin@tvl.su>
Autosubmit: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/store')
-rw-r--r--tvix/store/Cargo.toml8
-rw-r--r--tvix/store/src/main.rs1
-rw-r--r--tvix/store/src/nixbase32.rs119
3 files changed, 125 insertions, 3 deletions
diff --git a/tvix/store/Cargo.toml b/tvix/store/Cargo.toml
index a14fff99d62d..ee8c1191ee5d 100644
--- a/tvix/store/Cargo.toml
+++ b/tvix/store/Cargo.toml
@@ -6,13 +6,15 @@ edition = "2021"
 [dependencies]
 anyhow = "1.0.68"
 blake3 = { version = "1.3.1", features = ["rayon", "std"] }
+data-encoding = "2.3.3"
+lazy_static = "1.4.0"
 prost = "0.11.2"
 thiserror = "1.0.38"
 tonic = "0.8.2"
 
-[dev-dependencies]
-lazy_static = "1.4.0"
-
 [build-dependencies]
 prost-build = "0.11.2"
 tonic-build = "0.8.2"
+
+[dev-dependencies]
+test-case = "2.2.2"
diff --git a/tvix/store/src/main.rs b/tvix/store/src/main.rs
index 772a45d0bd59..cca96c1bbb57 100644
--- a/tvix/store/src/main.rs
+++ b/tvix/store/src/main.rs
@@ -1,3 +1,4 @@
+mod nixbase32;
 mod proto;
 
 #[cfg(test)]
diff --git a/tvix/store/src/nixbase32.rs b/tvix/store/src/nixbase32.rs
new file mode 100644
index 000000000000..8be9f1b6ea19
--- /dev/null
+++ b/tvix/store/src/nixbase32.rs
@@ -0,0 +1,119 @@
+//! Implements the slightly odd "base32" encoding that's used in Nix.
+//!
+//! Nix uses a custom alphabet. Contrary to other implementations (RFC4648),
+//! encoding to "nix base32" doesn't use any padding, and reads in characters
+//! in reverse order.
+//!
+//! This is also the main reason why `data_encoding::Encoding` can't be used
+//! directly, but this module aims to provide a similar interface (with some
+//! methods omitted).
+use data_encoding::{DecodeError, Encoding, Specification};
+use lazy_static::lazy_static;
+
+/// Nixbase32Encoding wraps a data_encoding::Encoding internally.
+/// We can't use it directly, as nix also reads in characters in reverse order.
+pub struct Nixbase32Encoding {
+    encoding: Encoding,
+}
+
+lazy_static! {
+    /// Returns a Nixbase32Encoding providing some functions seen on a data_encoding::Encoding.
+    pub static ref NIXBASE32: Nixbase32Encoding = nixbase32_encoding();
+}
+
+/// Populates the Nixbase32Encoding struct with a data_encoding::Encoding,
+/// using the nixbase32 alphabet and config.
+fn nixbase32_encoding() -> Nixbase32Encoding {
+    let mut spec = Specification::new();
+    spec.symbols.push_str("0123456789abcdfghijklmnpqrsvwxyz");
+
+    Nixbase32Encoding {
+        encoding: spec.encoding().unwrap(),
+    }
+}
+
+impl Nixbase32Encoding {
+    /// Returns encoded input
+    pub fn encode(&self, input: &[u8]) -> String {
+        // Reverse the input, reading in the bytes in reverse order.
+        let mut reversed = Vec::with_capacity(input.len());
+        reversed.extend(input.iter().rev());
+        self.encoding.encode(&reversed)
+    }
+
+    /// Returns decoded input
+    /// Check [data_encoding::Encoding::encode] for the error cases.
+    pub fn decode(&self, input: &[u8]) -> Result<Vec<u8>, DecodeError> {
+        // Decode first, then reverse the bytes of the output.
+        let output = self.encoding.decode(input)?;
+
+        let mut reversed = Vec::with_capacity(output.len());
+        reversed.extend(output.iter().rev());
+        Ok(reversed)
+    }
+
+    /// Returns the decoded length of an input of length len.
+    /// Check [data_encoding::Encoding::decode_len] for the error cases.
+    pub fn decode_len(&self, len: usize) -> Result<usize, DecodeError> {
+        self.encoding.decode_len(len)
+    }
+
+    /// Returns the encoded length of an input of length len
+    pub fn encode_len(&self, len: usize) -> usize {
+        self.encoding.encode_len(len)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::nixbase32::NIXBASE32;
+    use test_case::test_case;
+
+    #[test_case("", vec![] ; "empty bytes")]
+    // FUTUREWORK: b/235
+    // this seems to encode to 3w?
+    // #[test_case("0z", vec![0x1f]; "one byte")]
+    #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", vec![
+                 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a,
+                 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]; "nixpath")]
+    fn encode(enc: &str, dec: Vec<u8>) {
+        assert_eq!(enc, NIXBASE32.encode(&dec));
+    }
+
+    #[test_case("", Some(vec![]) ; "empty bytes")]
+    // FUTUREWORK: b/235
+    // this seems to require spec.check_trailing_bits and still fails?
+    // #[test_case("0z", Some(vec![0x1f]); "one byte")]
+    #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", Some(vec![
+                 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a,
+                 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]); "nixpath")]
+    // this is invalid encoding, because it encodes 10 1-bytes, so the carry
+    // would be 2 1-bytes
+    #[test_case("zz", None; "invalid encoding-1")]
+    // this is an even more specific example - it'd decode as 00000000 11
+    // FUTUREWORK: b/235
+    // #[test_case("c0", None; "invalid encoding-2")]
+
+    fn decode(enc: &str, dec: Option<Vec<u8>>) {
+        match dec {
+            Some(dec) => {
+                // The decode needs to match what's passed in dec
+                assert_eq!(dec, NIXBASE32.decode(enc.as_bytes()).unwrap());
+            }
+            None => {
+                // the decode needs to be an error
+                assert_eq!(true, NIXBASE32.decode(enc.as_bytes()).is_err());
+            }
+        }
+    }
+
+    #[test]
+    fn encode_len() {
+        assert_eq!(NIXBASE32.encode_len(20), 32)
+    }
+
+    #[test]
+    fn decode_len() {
+        assert_eq!(NIXBASE32.decode_len(32).unwrap(), 20)
+    }
+}