about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-01-26T13·42+0100
committerflokli <flokli@flokli.de>2023-01-30T11·46+0000
commit8ec8d03175e5e658600543dd0ba895f3f40e51e6 (patch)
tree4393e5d562556acb60ef604f5787658d1dbaa47d
parent85e563c554b338dc73770ebeffc7fdd642e3f627 (diff)
fix(tvix/store/nixbase32): fix encoder/decoder r/5781
Replace our data_encoding usage with the implementation taken from
https://github.com/nix-community/go-nix/tree/master/pkg/nixbase32

Also uncomment some of the unit tests, and add a regression test for a
NIXBASE32.encode with a 32 bytes sequence.

The previous implementation of NIXBASE32.encode is wrong in that case:

```
❯ nix hash to-base32 sha256-s6JN6XqP28g1uYMxaVAQMLiXcDG8tUs7OsE3QPhGqzA=
0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk
❯ echo -n s6JN6XqP28g1uYMxaVAQMLiXcDG8tUs7OsE3QPhGqzA= | base64 -d | hexdump -C
00000000  b3 a2 4d e9 7a 8f db c8  35 b9 83 31 69 50 10 30  |..M.z...5..1iP.0|
00000010  b8 97 70 31 bc b5 4b 3b  3a c1 37 40 f8 46 ab 30  |..p1..K;:.7@.F.0|
00000020
```

Change-Id: I0468b62bbbab390f8d7d3812e657e5d59dceed59
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7934
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: Adam Joseph <adam@westernsemico.com>
Reviewed-by: tazjin <tazjin@tvl.su>
-rw-r--r--tvix/derivation/src/derivation.rs6
-rw-r--r--tvix/derivation/src/errors.rs4
-rw-r--r--tvix/derivation/src/output.rs4
-rw-r--r--tvix/store/src/nixbase32.rs165
-rw-r--r--tvix/store/src/store_path.rs18
5 files changed, 121 insertions, 76 deletions
diff --git a/tvix/derivation/src/derivation.rs b/tvix/derivation/src/derivation.rs
index 45fcfb6b9f..390024da33 100644
--- a/tvix/derivation/src/derivation.rs
+++ b/tvix/derivation/src/derivation.rs
@@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize};
 use sha2::{Digest, Sha256};
 use std::collections::BTreeSet;
 use std::{collections::BTreeMap, fmt, fmt::Write};
-use tvix_store::nixbase32::NIXBASE32;
+use tvix_store::nixbase32;
 use tvix_store::store_path::{StorePath, STORE_DIR};
 
 #[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
@@ -64,9 +64,9 @@ fn build_store_path(
     };
     let compressed = compress_hash(&digest, 20);
     if is_derivation {
-        StorePath::from_string(format!("{}-{}.drv", NIXBASE32.encode(&compressed), name).as_str())
+        StorePath::from_string(format!("{}-{}.drv", nixbase32::encode(&compressed), name).as_str())
     } else {
-        StorePath::from_string(format!("{}-{}", NIXBASE32.encode(&compressed), name,).as_str())
+        StorePath::from_string(format!("{}-{}", nixbase32::encode(&compressed), name,).as_str())
     }
     .map_err(|_e| DerivationError::InvalidOutputName(name.to_string()))
     // Constructing the StorePath can only fail if the passed output name was
diff --git a/tvix/derivation/src/errors.rs b/tvix/derivation/src/errors.rs
index a1c49650ed..cf7e65697e 100644
--- a/tvix/derivation/src/errors.rs
+++ b/tvix/derivation/src/errors.rs
@@ -1,5 +1,5 @@
 use thiserror::Error;
-use tvix_store::store_path::ParseStorePathError;
+use tvix_store::{nixbase32::Nixbase32DecodeError, store_path::ParseStorePathError};
 
 /// Errors that can occur during the validation of Derivation structs.
 #[derive(Debug, Error, PartialEq)]
@@ -48,7 +48,7 @@ pub enum OutputError {
     #[error("Invalid ouput path {0}: {1}")]
     InvalidOutputPath(String, ParseStorePathError),
     #[error("Invalid hash encoding: {0}")]
-    InvalidHashEncoding(String, data_encoding::DecodeError),
+    InvalidHashEncoding(String, Nixbase32DecodeError),
     #[error("Invalid hash algo: {0}")]
     InvalidHashAlgo(String),
     #[error("Invalid Digest size {0} for algo {1}")]
diff --git a/tvix/derivation/src/output.rs b/tvix/derivation/src/output.rs
index 36a480d5a0..ac5a7bcb6c 100644
--- a/tvix/derivation/src/output.rs
+++ b/tvix/derivation/src/output.rs
@@ -1,5 +1,5 @@
 use serde::{Deserialize, Serialize};
-use tvix_store::{nixbase32::NIXBASE32, store_path::StorePath};
+use tvix_store::{nixbase32, store_path::StorePath};
 
 use crate::OutputError;
 
@@ -27,7 +27,7 @@ impl Output {
     pub fn validate(&self, validate_output_paths: bool) -> Result<(), OutputError> {
         if let Some(hash) = &self.hash {
             // try to decode digest
-            let result = NIXBASE32.decode(&hash.digest.as_bytes());
+            let result = nixbase32::decode(&hash.digest.as_bytes());
             match result {
                 Err(e) => return Err(OutputError::InvalidHashEncoding(hash.digest.clone(), e)),
                 Ok(digest) => {
diff --git a/tvix/store/src/nixbase32.rs b/tvix/store/src/nixbase32.rs
index 913e60714b..39aa4f1d54 100644
--- a/tvix/store/src/nixbase32.rs
+++ b/tvix/store/src/nixbase32.rs
@@ -4,113 +4,164 @@
 //! encoding to "nix base32" doesn't use any padding, and reads in characters
 //! in reverse order.
 //!
-//! This is also the main reason why `data_encoding::Encoding` can't be used
-//! directly, but this module aims to provide a similar interface (with some
-//! methods omitted).
-use data_encoding::{DecodeError, Encoding, Specification};
-use lazy_static::lazy_static;
-
-/// Nixbase32Encoding wraps a data_encoding::Encoding internally.
-/// We can't use it directly, as nix also reads in characters in reverse order.
-pub struct Nixbase32Encoding {
-    encoding: Encoding,
-}
+//! This is also the main reason why we can't use `data_encoding::Encoding` -
+//! it gets things wrong if there normally would be a need for padding.
+
+use std::fmt::Write;
+
+use thiserror::Error;
 
-lazy_static! {
-    /// Returns a Nixbase32Encoding providing some functions seen on a data_encoding::Encoding.
-    pub static ref NIXBASE32: Nixbase32Encoding = nixbase32_encoding();
+const ALPHABET: &'static [u8; 32] = b"0123456789abcdfghijklmnpqrsvwxyz";
+
+/// Errors that can occur while decoding nixbase32-encoded data.
+#[derive(Debug, Eq, PartialEq, Error)]
+pub enum Nixbase32DecodeError {
+    #[error("character {0:x} not in alphabet")]
+    CharacterNotInAlphabet(u8),
+    #[error("nonzero carry")]
+    NonzeroCarry(),
 }
 
-/// Populates the Nixbase32Encoding struct with a data_encoding::Encoding,
-/// using the nixbase32 alphabet and config.
-fn nixbase32_encoding() -> Nixbase32Encoding {
-    let mut spec = Specification::new();
-    spec.symbols.push_str("0123456789abcdfghijklmnpqrsvwxyz");
+/// Returns encoded input
+pub fn encode(input: &[u8]) -> String {
+    let output_len = encode_len(input.len());
+    let mut output = String::with_capacity(output_len);
+
+    if output_len > 0 {
+        for n in (0..=output_len - 1).rev() {
+            let b = n * 5; // bit offset within the entire input
+            let i = b / 8; // input byte index
+            let j = b % 8; // bit offset within that input byte
 
-    Nixbase32Encoding {
-        encoding: spec.encoding().unwrap(),
+            let mut c = input[i] >> j;
+            if i + 1 < input.len() {
+                // we want to right shift, and discard shifted out bits (unchecked)
+                // To do this without panicing, we need to do the shifting in u16
+                // and convert back to u8 afterwards.
+                c |= ((input[i + 1] as u16) << 8 - j as u16) as u8
+            }
+
+            output
+                .write_char(ALPHABET[(c & 0x1f) as usize] as char)
+                .unwrap();
+        }
     }
+
+    output
 }
 
-impl Nixbase32Encoding {
-    /// Returns encoded input
-    pub fn encode(&self, input: &[u8]) -> String {
-        // Reverse the input, reading in the bytes in reverse order.
-        let reversed: Vec<u8> = input.iter().cloned().rev().collect();
-        self.encoding.encode(&reversed)
-    }
+/// This maps a nixbase32-encoded character to its binary representation, which
+/// is also the index of the character in the alphabet.
+fn decode_char(encoded_char: &u8) -> Option<u8> {
+    Some(match encoded_char {
+        b'0'..=b'9' => encoded_char - b'0',
+        b'a'..=b'd' => encoded_char - b'a' + 10_u8,
+        b'f'..=b'n' => encoded_char - b'f' + 14_u8,
+        b'p'..=b's' => encoded_char - b'p' + 23_u8,
+        b'v'..=b'z' => encoded_char - b'v' + 27_u8,
+        _ => return None,
+    })
+}
 
-    /// Returns decoded input
-    /// Check [data_encoding::Encoding::encode] for the error cases.
-    pub fn decode(&self, input: &[u8]) -> Result<Vec<u8>, DecodeError> {
-        // Decode first, then reverse the bytes of the output.
-        let mut output = self.encoding.decode(input)?;
-        output.reverse();
-        Ok(output)
-    }
+/// Returns decoded input
+pub fn decode(input: &[u8]) -> Result<Vec<u8>, Nixbase32DecodeError> {
+    let output_len = decode_len(input.len());
+    let mut output: Vec<u8> = vec![0x00; output_len];
+
+    // loop over all characters in reverse, and keep the iteration count in n.
+    for (n, c) in input.iter().rev().enumerate() {
+        match decode_char(c) {
+            None => return Err(Nixbase32DecodeError::CharacterNotInAlphabet(*c)),
+            Some(c_decoded) => {
+                let b = n * 5;
+                let i = b / 8;
+                let j = b % 8;
 
-    /// Returns the decoded length of an input of length len.
-    /// Check [data_encoding::Encoding::decode_len] for the error cases.
-    pub fn decode_len(&self, len: usize) -> Result<usize, DecodeError> {
-        self.encoding.decode_len(len)
+                let val = (c_decoded as u16).rotate_left(j as u32);
+                output[i] |= (val & 0x00ff) as u8;
+                let carry = ((val & 0xff00) >> 8) as u8;
+
+                // if we're at the end of dst…
+                if i == output_len - 1 {
+                    // but have a nonzero carry, the encoding is invalid.
+                    if carry != 0 {
+                        return Err(Nixbase32DecodeError::NonzeroCarry());
+                    }
+                } else {
+                    output[i + 1] |= carry;
+                }
+            }
+        }
     }
 
-    /// Returns the encoded length of an input of length len
-    pub fn encode_len(&self, len: usize) -> usize {
-        self.encoding.encode_len(len)
+    Ok(output)
+}
+
+/// Returns the decoded length of an input of length len.
+pub fn decode_len(len: usize) -> usize {
+    return (len * 5) / 8;
+}
+
+/// Returns the encoded length of an input of length len
+pub fn encode_len(len: usize) -> usize {
+    if len == 0 {
+        return 0;
     }
+    return (len * 8 - 1) / 5 + 1;
 }
 
 #[cfg(test)]
 mod tests {
-    use crate::nixbase32::NIXBASE32;
     use test_case::test_case;
 
     #[test_case("", vec![] ; "empty bytes")]
-    // FUTUREWORK: b/235
-    // this seems to encode to 3w?
-    // #[test_case("0z", vec![0x1f]; "one byte")]
+    #[test_case("0z", vec![0x1f]; "one byte")]
     #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", vec![
                  0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a,
                  0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]; "store path")]
+    #[test_case("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", vec![
+        0xb3, 0xa2, 0x4d, 0xe9, 0x7a, 0x8f, 0xdb, 0xc8, 0x35, 0xb9, 0x83, 0x31, 0x69, 0x50, 0x10, 0x30,
+        0xb8, 0x97, 0x70, 0x31, 0xbc, 0xb5, 0x4b, 0x3b, 0x3a, 0xc1, 0x37, 0x40, 0xf8, 0x46, 0xab, 0x30,
+    ]; "sha256")]
     fn encode(enc: &str, dec: Vec<u8>) {
-        assert_eq!(enc, NIXBASE32.encode(&dec));
+        assert_eq!(enc, super::encode(&dec));
     }
 
     #[test_case("", Some(vec![]) ; "empty bytes")]
-    // FUTUREWORK: b/235
-    // this seems to require spec.check_trailing_bits and still fails?
-    // #[test_case("0z", Some(vec![0x1f]); "one byte")]
+    #[test_case("0z", Some(vec![0x1f]); "one byte")]
     #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", Some(vec![
                  0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a,
                  0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]); "store path")]
+    #[test_case("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", Some(vec![
+        0xb3, 0xa2, 0x4d, 0xe9, 0x7a, 0x8f, 0xdb, 0xc8, 0x35, 0xb9, 0x83, 0x31, 0x69, 0x50, 0x10, 0x30,
+        0xb8, 0x97, 0x70, 0x31, 0xbc, 0xb5, 0x4b, 0x3b, 0x3a, 0xc1, 0x37, 0x40, 0xf8, 0x46, 0xab, 0x30,
+    ]); "sha256")]
     // this is invalid encoding, because it encodes 10 1-bytes, so the carry
     // would be 2 1-bytes
     #[test_case("zz", None; "invalid encoding-1")]
     // this is an even more specific example - it'd decode as 00000000 11
-    // FUTUREWORK: b/235
-    // #[test_case("c0", None; "invalid encoding-2")]
+    #[test_case("c0", None; "invalid encoding-2")]
 
     fn decode(enc: &str, dec: Option<Vec<u8>>) {
         match dec {
             Some(dec) => {
                 // The decode needs to match what's passed in dec
-                assert_eq!(dec, NIXBASE32.decode(enc.as_bytes()).unwrap());
+                assert_eq!(dec, super::decode(enc.as_bytes()).unwrap());
             }
             None => {
                 // the decode needs to be an error
-                assert_eq!(true, NIXBASE32.decode(enc.as_bytes()).is_err());
+                assert_eq!(true, super::decode(enc.as_bytes()).is_err());
             }
         }
     }
 
     #[test]
     fn encode_len() {
-        assert_eq!(NIXBASE32.encode_len(20), 32)
+        assert_eq!(super::encode_len(20), 32)
     }
 
     #[test]
     fn decode_len() {
-        assert_eq!(NIXBASE32.decode_len(32).unwrap(), 20)
+        assert_eq!(super::decode_len(32), 20)
     }
 }
diff --git a/tvix/store/src/store_path.rs b/tvix/store/src/store_path.rs
index e7b600e8ae..5032a73fb1 100644
--- a/tvix/store/src/store_path.rs
+++ b/tvix/store/src/store_path.rs
@@ -1,5 +1,4 @@
-use crate::nixbase32::NIXBASE32;
-use data_encoding::DecodeError;
+use crate::nixbase32::{self, Nixbase32DecodeError};
 use std::fmt;
 use thiserror::Error;
 
@@ -19,7 +18,7 @@ pub enum ParseStorePathError {
     #[error("Dash is missing between hash and name")]
     MissingDash(),
     #[error("Hash encoding is invalid: {0}")]
-    InvalidHashEncoding(DecodeError),
+    InvalidHashEncoding(Nixbase32DecodeError),
     #[error("Invalid name: {0}")]
     InvalidName(String),
     #[error("Tried to parse an absolute path which was missing the store dir prefix.")]
@@ -53,7 +52,7 @@ impl StorePath {
             return Err(ParseStorePathError::InvalidName("".to_string()));
         }
 
-        let digest = match NIXBASE32.decode(s[..ENCODED_DIGEST_SIZE].as_bytes()) {
+        let digest = match nixbase32::decode(s[..ENCODED_DIGEST_SIZE].as_bytes()) {
             Ok(decoded) => decoded,
             Err(decoder_error) => {
                 return Err(ParseStorePathError::InvalidHashEncoding(decoder_error))
@@ -110,25 +109,20 @@ impl StorePath {
 
 impl fmt::Display for StorePath {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(
-            f,
-            "{}-{}",
-            crate::nixbase32::NIXBASE32.encode(&self.digest),
-            self.name
-        )
+        write!(f, "{}-{}", nixbase32::encode(&self.digest), self.name)
     }
 }
 
 #[cfg(test)]
 mod tests {
-    use crate::nixbase32::NIXBASE32;
+    use crate::nixbase32;
     use crate::store_path::{DIGEST_SIZE, ENCODED_DIGEST_SIZE};
 
     use super::{ParseStorePathError, StorePath};
 
     #[test]
     fn encoded_digest_size() {
-        assert_eq!(ENCODED_DIGEST_SIZE, NIXBASE32.encode_len(DIGEST_SIZE));
+        assert_eq!(ENCODED_DIGEST_SIZE, nixbase32::encode_len(DIGEST_SIZE));
     }
 
     #[test]