From 8ec8d03175e5e658600543dd0ba895f3f40e51e6 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Thu, 26 Jan 2023 14:42:16 +0100 Subject: fix(tvix/store/nixbase32): fix encoder/decoder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace our data_encoding usage with the implementation taken from https://github.com/nix-community/go-nix/tree/master/pkg/nixbase32 Also uncomment some of the unit tests, and add a regression test for a NIXBASE32.encode with a 32 bytes sequence. The previous implementation of NIXBASE32.encode is wrong in that case: ``` ❯ nix hash to-base32 sha256-s6JN6XqP28g1uYMxaVAQMLiXcDG8tUs7OsE3QPhGqzA= 0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk ❯ echo -n s6JN6XqP28g1uYMxaVAQMLiXcDG8tUs7OsE3QPhGqzA= | base64 -d | hexdump -C 00000000 b3 a2 4d e9 7a 8f db c8 35 b9 83 31 69 50 10 30 |..M.z...5..1iP.0| 00000010 b8 97 70 31 bc b5 4b 3b 3a c1 37 40 f8 46 ab 30 |..p1..K;:.7@.F.0| 00000020 ``` Change-Id: I0468b62bbbab390f8d7d3812e657e5d59dceed59 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7934 Tested-by: BuildkiteCI Autosubmit: flokli Reviewed-by: Adam Joseph Reviewed-by: tazjin --- tvix/derivation/src/derivation.rs | 6 +- tvix/derivation/src/errors.rs | 4 +- tvix/derivation/src/output.rs | 4 +- tvix/store/src/nixbase32.rs | 165 +++++++++++++++++++++++++------------- tvix/store/src/store_path.rs | 18 ++--- 5 files changed, 121 insertions(+), 76 deletions(-) diff --git a/tvix/derivation/src/derivation.rs b/tvix/derivation/src/derivation.rs index 45fcfb6b9f..390024da33 100644 --- a/tvix/derivation/src/derivation.rs +++ b/tvix/derivation/src/derivation.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; use std::collections::BTreeSet; use std::{collections::BTreeMap, fmt, fmt::Write}; -use tvix_store::nixbase32::NIXBASE32; +use tvix_store::nixbase32; use tvix_store::store_path::{StorePath, STORE_DIR}; #[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] @@ -64,9 +64,9 @@ fn build_store_path( }; let compressed = compress_hash(&digest, 20); if is_derivation { - StorePath::from_string(format!("{}-{}.drv", NIXBASE32.encode(&compressed), name).as_str()) + StorePath::from_string(format!("{}-{}.drv", nixbase32::encode(&compressed), name).as_str()) } else { - StorePath::from_string(format!("{}-{}", NIXBASE32.encode(&compressed), name,).as_str()) + StorePath::from_string(format!("{}-{}", nixbase32::encode(&compressed), name,).as_str()) } .map_err(|_e| DerivationError::InvalidOutputName(name.to_string())) // Constructing the StorePath can only fail if the passed output name was diff --git a/tvix/derivation/src/errors.rs b/tvix/derivation/src/errors.rs index a1c49650ed..cf7e65697e 100644 --- a/tvix/derivation/src/errors.rs +++ b/tvix/derivation/src/errors.rs @@ -1,5 +1,5 @@ use thiserror::Error; -use tvix_store::store_path::ParseStorePathError; +use tvix_store::{nixbase32::Nixbase32DecodeError, store_path::ParseStorePathError}; /// Errors that can occur during the validation of Derivation structs. #[derive(Debug, Error, PartialEq)] @@ -48,7 +48,7 @@ pub enum OutputError { #[error("Invalid ouput path {0}: {1}")] InvalidOutputPath(String, ParseStorePathError), #[error("Invalid hash encoding: {0}")] - InvalidHashEncoding(String, data_encoding::DecodeError), + InvalidHashEncoding(String, Nixbase32DecodeError), #[error("Invalid hash algo: {0}")] InvalidHashAlgo(String), #[error("Invalid Digest size {0} for algo {1}")] diff --git a/tvix/derivation/src/output.rs b/tvix/derivation/src/output.rs index 36a480d5a0..ac5a7bcb6c 100644 --- a/tvix/derivation/src/output.rs +++ b/tvix/derivation/src/output.rs @@ -1,5 +1,5 @@ use serde::{Deserialize, Serialize}; -use tvix_store::{nixbase32::NIXBASE32, store_path::StorePath}; +use tvix_store::{nixbase32, store_path::StorePath}; use crate::OutputError; @@ -27,7 +27,7 @@ impl Output { pub fn validate(&self, validate_output_paths: bool) -> Result<(), OutputError> { if let Some(hash) = &self.hash { // try to decode digest - let result = NIXBASE32.decode(&hash.digest.as_bytes()); + let result = nixbase32::decode(&hash.digest.as_bytes()); match result { Err(e) => return Err(OutputError::InvalidHashEncoding(hash.digest.clone(), e)), Ok(digest) => { diff --git a/tvix/store/src/nixbase32.rs b/tvix/store/src/nixbase32.rs index 913e60714b..39aa4f1d54 100644 --- a/tvix/store/src/nixbase32.rs +++ b/tvix/store/src/nixbase32.rs @@ -4,113 +4,164 @@ //! encoding to "nix base32" doesn't use any padding, and reads in characters //! in reverse order. //! -//! This is also the main reason why `data_encoding::Encoding` can't be used -//! directly, but this module aims to provide a similar interface (with some -//! methods omitted). -use data_encoding::{DecodeError, Encoding, Specification}; -use lazy_static::lazy_static; - -/// Nixbase32Encoding wraps a data_encoding::Encoding internally. -/// We can't use it directly, as nix also reads in characters in reverse order. -pub struct Nixbase32Encoding { - encoding: Encoding, -} +//! This is also the main reason why we can't use `data_encoding::Encoding` - +//! it gets things wrong if there normally would be a need for padding. + +use std::fmt::Write; + +use thiserror::Error; -lazy_static! { - /// Returns a Nixbase32Encoding providing some functions seen on a data_encoding::Encoding. - pub static ref NIXBASE32: Nixbase32Encoding = nixbase32_encoding(); +const ALPHABET: &'static [u8; 32] = b"0123456789abcdfghijklmnpqrsvwxyz"; + +/// Errors that can occur while decoding nixbase32-encoded data. +#[derive(Debug, Eq, PartialEq, Error)] +pub enum Nixbase32DecodeError { + #[error("character {0:x} not in alphabet")] + CharacterNotInAlphabet(u8), + #[error("nonzero carry")] + NonzeroCarry(), } -/// Populates the Nixbase32Encoding struct with a data_encoding::Encoding, -/// using the nixbase32 alphabet and config. -fn nixbase32_encoding() -> Nixbase32Encoding { - let mut spec = Specification::new(); - spec.symbols.push_str("0123456789abcdfghijklmnpqrsvwxyz"); +/// Returns encoded input +pub fn encode(input: &[u8]) -> String { + let output_len = encode_len(input.len()); + let mut output = String::with_capacity(output_len); + + if output_len > 0 { + for n in (0..=output_len - 1).rev() { + let b = n * 5; // bit offset within the entire input + let i = b / 8; // input byte index + let j = b % 8; // bit offset within that input byte - Nixbase32Encoding { - encoding: spec.encoding().unwrap(), + let mut c = input[i] >> j; + if i + 1 < input.len() { + // we want to right shift, and discard shifted out bits (unchecked) + // To do this without panicing, we need to do the shifting in u16 + // and convert back to u8 afterwards. + c |= ((input[i + 1] as u16) << 8 - j as u16) as u8 + } + + output + .write_char(ALPHABET[(c & 0x1f) as usize] as char) + .unwrap(); + } } + + output } -impl Nixbase32Encoding { - /// Returns encoded input - pub fn encode(&self, input: &[u8]) -> String { - // Reverse the input, reading in the bytes in reverse order. - let reversed: Vec = input.iter().cloned().rev().collect(); - self.encoding.encode(&reversed) - } +/// This maps a nixbase32-encoded character to its binary representation, which +/// is also the index of the character in the alphabet. +fn decode_char(encoded_char: &u8) -> Option { + Some(match encoded_char { + b'0'..=b'9' => encoded_char - b'0', + b'a'..=b'd' => encoded_char - b'a' + 10_u8, + b'f'..=b'n' => encoded_char - b'f' + 14_u8, + b'p'..=b's' => encoded_char - b'p' + 23_u8, + b'v'..=b'z' => encoded_char - b'v' + 27_u8, + _ => return None, + }) +} - /// Returns decoded input - /// Check [data_encoding::Encoding::encode] for the error cases. - pub fn decode(&self, input: &[u8]) -> Result, DecodeError> { - // Decode first, then reverse the bytes of the output. - let mut output = self.encoding.decode(input)?; - output.reverse(); - Ok(output) - } +/// Returns decoded input +pub fn decode(input: &[u8]) -> Result, Nixbase32DecodeError> { + let output_len = decode_len(input.len()); + let mut output: Vec = vec![0x00; output_len]; + + // loop over all characters in reverse, and keep the iteration count in n. + for (n, c) in input.iter().rev().enumerate() { + match decode_char(c) { + None => return Err(Nixbase32DecodeError::CharacterNotInAlphabet(*c)), + Some(c_decoded) => { + let b = n * 5; + let i = b / 8; + let j = b % 8; - /// Returns the decoded length of an input of length len. - /// Check [data_encoding::Encoding::decode_len] for the error cases. - pub fn decode_len(&self, len: usize) -> Result { - self.encoding.decode_len(len) + let val = (c_decoded as u16).rotate_left(j as u32); + output[i] |= (val & 0x00ff) as u8; + let carry = ((val & 0xff00) >> 8) as u8; + + // if we're at the end of dst… + if i == output_len - 1 { + // but have a nonzero carry, the encoding is invalid. + if carry != 0 { + return Err(Nixbase32DecodeError::NonzeroCarry()); + } + } else { + output[i + 1] |= carry; + } + } + } } - /// Returns the encoded length of an input of length len - pub fn encode_len(&self, len: usize) -> usize { - self.encoding.encode_len(len) + Ok(output) +} + +/// Returns the decoded length of an input of length len. +pub fn decode_len(len: usize) -> usize { + return (len * 5) / 8; +} + +/// Returns the encoded length of an input of length len +pub fn encode_len(len: usize) -> usize { + if len == 0 { + return 0; } + return (len * 8 - 1) / 5 + 1; } #[cfg(test)] mod tests { - use crate::nixbase32::NIXBASE32; use test_case::test_case; #[test_case("", vec![] ; "empty bytes")] - // FUTUREWORK: b/235 - // this seems to encode to 3w? - // #[test_case("0z", vec![0x1f]; "one byte")] + #[test_case("0z", vec![0x1f]; "one byte")] #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", vec![ 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a, 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]; "store path")] + #[test_case("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", vec![ + 0xb3, 0xa2, 0x4d, 0xe9, 0x7a, 0x8f, 0xdb, 0xc8, 0x35, 0xb9, 0x83, 0x31, 0x69, 0x50, 0x10, 0x30, + 0xb8, 0x97, 0x70, 0x31, 0xbc, 0xb5, 0x4b, 0x3b, 0x3a, 0xc1, 0x37, 0x40, 0xf8, 0x46, 0xab, 0x30, + ]; "sha256")] fn encode(enc: &str, dec: Vec) { - assert_eq!(enc, NIXBASE32.encode(&dec)); + assert_eq!(enc, super::encode(&dec)); } #[test_case("", Some(vec![]) ; "empty bytes")] - // FUTUREWORK: b/235 - // this seems to require spec.check_trailing_bits and still fails? - // #[test_case("0z", Some(vec![0x1f]); "one byte")] + #[test_case("0z", Some(vec![0x1f]); "one byte")] #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", Some(vec![ 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a, 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]); "store path")] + #[test_case("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", Some(vec![ + 0xb3, 0xa2, 0x4d, 0xe9, 0x7a, 0x8f, 0xdb, 0xc8, 0x35, 0xb9, 0x83, 0x31, 0x69, 0x50, 0x10, 0x30, + 0xb8, 0x97, 0x70, 0x31, 0xbc, 0xb5, 0x4b, 0x3b, 0x3a, 0xc1, 0x37, 0x40, 0xf8, 0x46, 0xab, 0x30, + ]); "sha256")] // this is invalid encoding, because it encodes 10 1-bytes, so the carry // would be 2 1-bytes #[test_case("zz", None; "invalid encoding-1")] // this is an even more specific example - it'd decode as 00000000 11 - // FUTUREWORK: b/235 - // #[test_case("c0", None; "invalid encoding-2")] + #[test_case("c0", None; "invalid encoding-2")] fn decode(enc: &str, dec: Option>) { match dec { Some(dec) => { // The decode needs to match what's passed in dec - assert_eq!(dec, NIXBASE32.decode(enc.as_bytes()).unwrap()); + assert_eq!(dec, super::decode(enc.as_bytes()).unwrap()); } None => { // the decode needs to be an error - assert_eq!(true, NIXBASE32.decode(enc.as_bytes()).is_err()); + assert_eq!(true, super::decode(enc.as_bytes()).is_err()); } } } #[test] fn encode_len() { - assert_eq!(NIXBASE32.encode_len(20), 32) + assert_eq!(super::encode_len(20), 32) } #[test] fn decode_len() { - assert_eq!(NIXBASE32.decode_len(32).unwrap(), 20) + assert_eq!(super::decode_len(32), 20) } } diff --git a/tvix/store/src/store_path.rs b/tvix/store/src/store_path.rs index e7b600e8ae..5032a73fb1 100644 --- a/tvix/store/src/store_path.rs +++ b/tvix/store/src/store_path.rs @@ -1,5 +1,4 @@ -use crate::nixbase32::NIXBASE32; -use data_encoding::DecodeError; +use crate::nixbase32::{self, Nixbase32DecodeError}; use std::fmt; use thiserror::Error; @@ -19,7 +18,7 @@ pub enum ParseStorePathError { #[error("Dash is missing between hash and name")] MissingDash(), #[error("Hash encoding is invalid: {0}")] - InvalidHashEncoding(DecodeError), + InvalidHashEncoding(Nixbase32DecodeError), #[error("Invalid name: {0}")] InvalidName(String), #[error("Tried to parse an absolute path which was missing the store dir prefix.")] @@ -53,7 +52,7 @@ impl StorePath { return Err(ParseStorePathError::InvalidName("".to_string())); } - let digest = match NIXBASE32.decode(s[..ENCODED_DIGEST_SIZE].as_bytes()) { + let digest = match nixbase32::decode(s[..ENCODED_DIGEST_SIZE].as_bytes()) { Ok(decoded) => decoded, Err(decoder_error) => { return Err(ParseStorePathError::InvalidHashEncoding(decoder_error)) @@ -110,25 +109,20 @@ impl StorePath { impl fmt::Display for StorePath { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{}-{}", - crate::nixbase32::NIXBASE32.encode(&self.digest), - self.name - ) + write!(f, "{}-{}", nixbase32::encode(&self.digest), self.name) } } #[cfg(test)] mod tests { - use crate::nixbase32::NIXBASE32; + use crate::nixbase32; use crate::store_path::{DIGEST_SIZE, ENCODED_DIGEST_SIZE}; use super::{ParseStorePathError, StorePath}; #[test] fn encoded_digest_size() { - assert_eq!(ENCODED_DIGEST_SIZE, NIXBASE32.encode_len(DIGEST_SIZE)); + assert_eq!(ENCODED_DIGEST_SIZE, nixbase32::encode_len(DIGEST_SIZE)); } #[test] -- cgit 1.4.1