From c27bacd905a14207edc56850fd4ef9383706b5c4 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Tue, 31 Jan 2023 12:18:03 +0100 Subject: refactor(tvix): introduce nix-compat crate Move nixbase32 and store_path into this. This allows //tvix/cli to not pull in //tvix/store for now. Change-Id: Id3a32867205d95794bc0d33b21d4cb3d9bafd02a Reviewed-on: https://cl.tvl.fyi/c/depot/+/7964 Tested-by: BuildkiteCI Reviewed-by: tazjin --- tvix/nix-compat/src/lib.rs | 2 + tvix/nix-compat/src/nixbase32.rs | 167 ++++++++++++++++++++++++++++++++ tvix/nix-compat/src/store_path.rs | 198 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 367 insertions(+) create mode 100644 tvix/nix-compat/src/lib.rs create mode 100644 tvix/nix-compat/src/nixbase32.rs create mode 100644 tvix/nix-compat/src/store_path.rs (limited to 'tvix/nix-compat/src') diff --git a/tvix/nix-compat/src/lib.rs b/tvix/nix-compat/src/lib.rs new file mode 100644 index 000000000000..6ca48e9ef035 --- /dev/null +++ b/tvix/nix-compat/src/lib.rs @@ -0,0 +1,2 @@ +pub mod nixbase32; +pub mod store_path; diff --git a/tvix/nix-compat/src/nixbase32.rs b/tvix/nix-compat/src/nixbase32.rs new file mode 100644 index 000000000000..39aa4f1d5461 --- /dev/null +++ b/tvix/nix-compat/src/nixbase32.rs @@ -0,0 +1,167 @@ +//! Implements the slightly odd "base32" encoding that's used in Nix. +//! +//! Nix uses a custom alphabet. Contrary to other implementations (RFC4648), +//! encoding to "nix base32" doesn't use any padding, and reads in characters +//! in reverse order. +//! +//! This is also the main reason why we can't use `data_encoding::Encoding` - +//! it gets things wrong if there normally would be a need for padding. + +use std::fmt::Write; + +use thiserror::Error; + +const ALPHABET: &'static [u8; 32] = b"0123456789abcdfghijklmnpqrsvwxyz"; + +/// Errors that can occur while decoding nixbase32-encoded data. +#[derive(Debug, Eq, PartialEq, Error)] +pub enum Nixbase32DecodeError { + #[error("character {0:x} not in alphabet")] + CharacterNotInAlphabet(u8), + #[error("nonzero carry")] + NonzeroCarry(), +} + +/// Returns encoded input +pub fn encode(input: &[u8]) -> String { + let output_len = encode_len(input.len()); + let mut output = String::with_capacity(output_len); + + if output_len > 0 { + for n in (0..=output_len - 1).rev() { + let b = n * 5; // bit offset within the entire input + let i = b / 8; // input byte index + let j = b % 8; // bit offset within that input byte + + let mut c = input[i] >> j; + if i + 1 < input.len() { + // we want to right shift, and discard shifted out bits (unchecked) + // To do this without panicing, we need to do the shifting in u16 + // and convert back to u8 afterwards. + c |= ((input[i + 1] as u16) << 8 - j as u16) as u8 + } + + output + .write_char(ALPHABET[(c & 0x1f) as usize] as char) + .unwrap(); + } + } + + output +} + +/// This maps a nixbase32-encoded character to its binary representation, which +/// is also the index of the character in the alphabet. +fn decode_char(encoded_char: &u8) -> Option { + Some(match encoded_char { + b'0'..=b'9' => encoded_char - b'0', + b'a'..=b'd' => encoded_char - b'a' + 10_u8, + b'f'..=b'n' => encoded_char - b'f' + 14_u8, + b'p'..=b's' => encoded_char - b'p' + 23_u8, + b'v'..=b'z' => encoded_char - b'v' + 27_u8, + _ => return None, + }) +} + +/// Returns decoded input +pub fn decode(input: &[u8]) -> Result, Nixbase32DecodeError> { + let output_len = decode_len(input.len()); + let mut output: Vec = vec![0x00; output_len]; + + // loop over all characters in reverse, and keep the iteration count in n. + for (n, c) in input.iter().rev().enumerate() { + match decode_char(c) { + None => return Err(Nixbase32DecodeError::CharacterNotInAlphabet(*c)), + Some(c_decoded) => { + let b = n * 5; + let i = b / 8; + let j = b % 8; + + let val = (c_decoded as u16).rotate_left(j as u32); + output[i] |= (val & 0x00ff) as u8; + let carry = ((val & 0xff00) >> 8) as u8; + + // if we're at the end of dst… + if i == output_len - 1 { + // but have a nonzero carry, the encoding is invalid. + if carry != 0 { + return Err(Nixbase32DecodeError::NonzeroCarry()); + } + } else { + output[i + 1] |= carry; + } + } + } + } + + Ok(output) +} + +/// Returns the decoded length of an input of length len. +pub fn decode_len(len: usize) -> usize { + return (len * 5) / 8; +} + +/// Returns the encoded length of an input of length len +pub fn encode_len(len: usize) -> usize { + if len == 0 { + return 0; + } + return (len * 8 - 1) / 5 + 1; +} + +#[cfg(test)] +mod tests { + use test_case::test_case; + + #[test_case("", vec![] ; "empty bytes")] + #[test_case("0z", vec![0x1f]; "one byte")] + #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", vec![ + 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a, + 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]; "store path")] + #[test_case("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", vec![ + 0xb3, 0xa2, 0x4d, 0xe9, 0x7a, 0x8f, 0xdb, 0xc8, 0x35, 0xb9, 0x83, 0x31, 0x69, 0x50, 0x10, 0x30, + 0xb8, 0x97, 0x70, 0x31, 0xbc, 0xb5, 0x4b, 0x3b, 0x3a, 0xc1, 0x37, 0x40, 0xf8, 0x46, 0xab, 0x30, + ]; "sha256")] + fn encode(enc: &str, dec: Vec) { + assert_eq!(enc, super::encode(&dec)); + } + + #[test_case("", Some(vec![]) ; "empty bytes")] + #[test_case("0z", Some(vec![0x1f]); "one byte")] + #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", Some(vec![ + 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a, + 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]); "store path")] + #[test_case("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", Some(vec![ + 0xb3, 0xa2, 0x4d, 0xe9, 0x7a, 0x8f, 0xdb, 0xc8, 0x35, 0xb9, 0x83, 0x31, 0x69, 0x50, 0x10, 0x30, + 0xb8, 0x97, 0x70, 0x31, 0xbc, 0xb5, 0x4b, 0x3b, 0x3a, 0xc1, 0x37, 0x40, 0xf8, 0x46, 0xab, 0x30, + ]); "sha256")] + // this is invalid encoding, because it encodes 10 1-bytes, so the carry + // would be 2 1-bytes + #[test_case("zz", None; "invalid encoding-1")] + // this is an even more specific example - it'd decode as 00000000 11 + #[test_case("c0", None; "invalid encoding-2")] + + fn decode(enc: &str, dec: Option>) { + match dec { + Some(dec) => { + // The decode needs to match what's passed in dec + assert_eq!(dec, super::decode(enc.as_bytes()).unwrap()); + } + None => { + // the decode needs to be an error + assert_eq!(true, super::decode(enc.as_bytes()).is_err()); + } + } + } + + #[test] + fn encode_len() { + assert_eq!(super::encode_len(20), 32) + } + + #[test] + fn decode_len() { + assert_eq!(super::decode_len(32), 20) + } +} diff --git a/tvix/nix-compat/src/store_path.rs b/tvix/nix-compat/src/store_path.rs new file mode 100644 index 000000000000..5032a73fb19b --- /dev/null +++ b/tvix/nix-compat/src/store_path.rs @@ -0,0 +1,198 @@ +use crate::nixbase32::{self, Nixbase32DecodeError}; +use std::fmt; +use thiserror::Error; + +pub const DIGEST_SIZE: usize = 20; +// lazy_static doesn't allow us to call NIXBASE32.encode_len(), so we ran it +// manually and have an assert in the tests. +pub const ENCODED_DIGEST_SIZE: usize = 32; + +// The store dir prefix, without trailing slash. +// That's usually where the Nix store is mounted at. +pub const STORE_DIR: &str = "/nix/store"; +pub const STORE_DIR_WITH_SLASH: &str = "/nix/store/"; + +/// Errors that can occur during the validation of name characters. +#[derive(Debug, PartialEq, Eq, Error)] +pub enum ParseStorePathError { + #[error("Dash is missing between hash and name")] + MissingDash(), + #[error("Hash encoding is invalid: {0}")] + InvalidHashEncoding(Nixbase32DecodeError), + #[error("Invalid name: {0}")] + InvalidName(String), + #[error("Tried to parse an absolute path which was missing the store dir prefix.")] + MissingStoreDir(), +} + +/// Represents a path in the Nix store (a direct child of [STORE_DIR]). +/// +/// It starts with a digest (20 bytes), [struct@NIXBASE32]-encoded, followed by +/// a `-`, and ends with a `name`, which is a string, consisting only of ASCCI +/// alphanumeric characters, or one of the following characters: `-`, `_`, `.`, +/// `+`, `?`, `=`. +/// +/// The name is usually used to describe the pname and version of a package. +/// Derivations paths can also be represented as store paths, they end +/// with .drv. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct StorePath { + pub digest: [u8; DIGEST_SIZE], + pub name: String, +} + +impl StorePath { + pub fn from_string(s: &str) -> Result { + // the whole string needs to be at least: + // + // - 32 characters (encoded hash) + // - 1 dash + // - 1 character for the name + if s.len() < ENCODED_DIGEST_SIZE + 2 { + return Err(ParseStorePathError::InvalidName("".to_string())); + } + + let digest = match nixbase32::decode(s[..ENCODED_DIGEST_SIZE].as_bytes()) { + Ok(decoded) => decoded, + Err(decoder_error) => { + return Err(ParseStorePathError::InvalidHashEncoding(decoder_error)) + } + }; + + if s.as_bytes()[ENCODED_DIGEST_SIZE] != b'-' { + return Err(ParseStorePathError::MissingDash()); + } + + StorePath::validate_name(&s[ENCODED_DIGEST_SIZE + 2..])?; + + Ok(StorePath { + name: s[ENCODED_DIGEST_SIZE + 1..].to_string(), + digest: digest.try_into().expect("size is known"), + }) + } + + /// Construct a [StorePath] from an absolute store path string. + /// That is a string starting with the store prefix (/nix/store) + pub fn from_absolute_path(s: &str) -> Result { + match s.strip_prefix(STORE_DIR_WITH_SLASH) { + Some(s_stripped) => Self::from_string(s_stripped), + None => Err(ParseStorePathError::MissingStoreDir()), + } + } + + /// Converts the [StorePath] to an absolute store path string. + /// That is a string starting with the store prefix (/nix/store) + pub fn to_absolute_path(&self) -> String { + format!("{}/{}", STORE_DIR, self) + } + + /// Checks a given &str to match the restrictions for store path names. + pub fn validate_name(s: &str) -> Result<(), ParseStorePathError> { + for c in s.chars() { + if c.is_ascii_alphanumeric() + || c == '-' + || c == '_' + || c == '.' + || c == '+' + || c == '?' + || c == '=' + { + continue; + } + + return Err(ParseStorePathError::InvalidName(s.to_string())); + } + + Ok(()) + } +} + +impl fmt::Display for StorePath { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}-{}", nixbase32::encode(&self.digest), self.name) + } +} + +#[cfg(test)] +mod tests { + use crate::nixbase32; + use crate::store_path::{DIGEST_SIZE, ENCODED_DIGEST_SIZE}; + + use super::{ParseStorePathError, StorePath}; + + #[test] + fn encoded_digest_size() { + assert_eq!(ENCODED_DIGEST_SIZE, nixbase32::encode_len(DIGEST_SIZE)); + } + + #[test] + fn happy_path() { + let example_nix_path_str = + "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432"; + let nixpath = + StorePath::from_string(&example_nix_path_str).expect("Error parsing example string"); + + let expected_digest: [u8; DIGEST_SIZE] = [ + 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a, + 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00, + ]; + + assert_eq!("net-tools-1.60_p20170221182432", nixpath.name); + assert_eq!(nixpath.digest, expected_digest); + + assert_eq!(example_nix_path_str, nixpath.to_string()) + } + + #[test] + fn invalid_hash_length() { + StorePath::from_string("00bgd045z0d4icpbc2yy-net-tools-1.60_p20170221182432") + .expect_err("No error raised."); + } + + #[test] + fn invalid_encoding_hash() { + StorePath::from_string("00bgd045z0d4icpbc2yyz4gx48aku4la-net-tools-1.60_p20170221182432") + .expect_err("No error raised."); + } + + #[test] + fn more_than_just_the_bare_nix_store_path() { + StorePath::from_string( + "00bgd045z0d4icpbc2yyz4gx48aku4la-net-tools-1.60_p20170221182432/bin/arp", + ) + .expect_err("No error raised."); + } + + #[test] + fn no_dash_between_hash_and_name() { + StorePath::from_string("00bgd045z0d4icpbc2yyz4gx48ak44lanet-tools-1.60_p20170221182432") + .expect_err("No error raised."); + } + + #[test] + fn absolute_path() { + let example_nix_path_str = + "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432"; + let nixpath_expected = StorePath::from_string(&example_nix_path_str).expect("must parse"); + + let nixpath_actual = StorePath::from_absolute_path( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + ) + .expect("must parse"); + + assert_eq!(nixpath_expected, nixpath_actual); + + assert_eq!( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + nixpath_actual.to_absolute_path(), + ); + } + + #[test] + fn absolute_path_missing_prefix() { + assert_eq!( + ParseStorePathError::MissingStoreDir(), + StorePath::from_absolute_path("foobar-123").expect_err("must fail") + ); + } +} -- cgit 1.4.1