diff options
Diffstat (limited to 'tvix/nix-compat/src/store_path')
-rw-r--r-- | tvix/nix-compat/src/store_path/mod.rs | 328 | ||||
-rw-r--r-- | tvix/nix-compat/src/store_path/utils.rs | 312 |
2 files changed, 640 insertions, 0 deletions
diff --git a/tvix/nix-compat/src/store_path/mod.rs b/tvix/nix-compat/src/store_path/mod.rs new file mode 100644 index 000000000000..d3317b67f62e --- /dev/null +++ b/tvix/nix-compat/src/store_path/mod.rs @@ -0,0 +1,328 @@ +use crate::nixbase32::{self, Nixbase32DecodeError}; +use data_encoding::BASE64; +use std::{fmt, path::PathBuf, str::FromStr}; +use thiserror; + +#[cfg(target_family = "unix")] +use std::os::unix::ffi::OsStringExt; + +mod utils; + +pub use utils::*; + +pub const DIGEST_SIZE: usize = 20; +// lazy_static doesn't allow us to call NIXBASE32.encode_len(), so we ran it +// manually and have an assert in the tests. +pub const ENCODED_DIGEST_SIZE: usize = 32; + +// The store dir prefix, without trailing slash. +// That's usually where the Nix store is mounted at. +pub const STORE_DIR: &str = "/nix/store"; +pub const STORE_DIR_WITH_SLASH: &str = "/nix/store/"; + +/// Errors that can occur when parsing a literal store path +#[derive(Debug, PartialEq, Eq, thiserror::Error)] +pub enum Error { + #[error("Dash is missing between hash and name")] + MissingDash(), + #[error("Hash encoding is invalid: {0}")] + InvalidHashEncoding(Nixbase32DecodeError), + #[error("Invalid length")] + InvalidLength(), + #[error( + "Invalid name: \"{}\", character at position {} is invalid", + std::str::from_utf8(&.0).unwrap_or(&BASE64.encode(.0)), + .1, + )] + InvalidName(Vec<u8>, usize), + #[error("Tried to parse an absolute path which was missing the store dir prefix.")] + MissingStoreDir(), +} + +/// Represents a path in the Nix store (a direct child of [STORE_DIR]). +/// +/// It consists of a digest (20 bytes), and a name, which is a string. +/// The name may only contain ASCII alphanumeric, or one of the following +/// characters: `-`, `_`, `.`, `+`, `?`, `=`. +/// The name is usually used to describe the pname and version of a package. +/// Derivation paths can also be represented as store paths, their names just +/// end with the `.drv` prefix. +/// +/// A [StorePath] does not encode any additional subpath "inside" the store +/// path. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct StorePath { + pub digest: [u8; DIGEST_SIZE], + pub name: String, +} + +impl PartialOrd for StorePath { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + self.digest.partial_cmp(&other.digest) + } +} + +impl Ord for StorePath { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.digest.cmp(&other.digest) + } +} + +impl FromStr for StorePath { + type Err = Error; + + /// Construct a [StorePath] by passing the `$digest-$name` string + /// that comes after [STORE_DIR_WITH_SLASH]. + fn from_str(s: &str) -> Result<Self, Self::Err> { + Self::from_bytes(s.as_bytes()) + } +} + +impl StorePath { + /// Construct a [StorePath] by passing the `$digest-$name` string + /// that comes after [STORE_DIR_WITH_SLASH]. + pub fn from_bytes(s: &[u8]) -> Result<StorePath, Error> { + // the whole string needs to be at least: + // + // - 32 characters (encoded hash) + // - 1 dash + // - 1 character for the name + if s.len() < ENCODED_DIGEST_SIZE + 2 { + Err(Error::InvalidLength())? + } + + let digest = match nixbase32::decode(&s[..ENCODED_DIGEST_SIZE]) { + Ok(decoded) => decoded, + Err(decoder_error) => return Err(Error::InvalidHashEncoding(decoder_error)), + }; + + if s[ENCODED_DIGEST_SIZE] != b'-' { + return Err(Error::MissingDash()); + } + + Ok(StorePath { + name: validate_name(&s[ENCODED_DIGEST_SIZE + 1..])?, + digest: digest.try_into().expect("size is known"), + }) + } + + /// Construct a [StorePath] from an absolute store path string. + /// This is equivalent to calling [StorePath::from_bytes], but stripping the + /// [STORE_DIR_WITH_SLASH] prefix before. + pub fn from_absolute_path(s: &[u8]) -> Result<StorePath, Error> { + match s.strip_prefix(STORE_DIR_WITH_SLASH.as_bytes()) { + Some(s_stripped) => Self::from_bytes(s_stripped), + None => Err(Error::MissingStoreDir()), + } + } + + /// Decompose a string into a [StorePath] and a [PathBuf] containing the + /// rest of the path, or an error. + #[cfg(target_family = "unix")] + pub fn from_absolute_path_full(s: &str) -> Result<(StorePath, PathBuf), Error> { + // strip [STORE_DIR_WITH_SLASH] from s + match s.strip_prefix(STORE_DIR_WITH_SLASH) { + None => Err(Error::MissingStoreDir()), + Some(rest) => { + // put rest in a PathBuf + let mut p = PathBuf::new(); + p.push(rest); + + let mut it = p.components(); + + // The first component of the rest must be parse-able as a [StorePath] + if let Some(first_component) = it.next() { + // convert first component to StorePath + let first_component_bytes = first_component.as_os_str().to_owned().into_vec(); + let store_path = StorePath::from_bytes(&first_component_bytes)?; + // collect rest + let rest_buf: PathBuf = it.collect(); + Ok((store_path, rest_buf)) + } else { + Err(Error::InvalidLength()) // Well, or missing "/"? + } + } + } + } + + /// Converts the [StorePath] to an absolute store path string. + /// That is just the string representation, prefixed with the store prefix + /// ([STORE_DIR_WITH_SLASH]), + pub fn to_absolute_path(&self) -> String { + format!("{}{}", STORE_DIR_WITH_SLASH, self) + } +} + +/// Checks a given &[u8] to match the restrictions for [StorePath::name], and +/// returns the name as string if successful. +pub(crate) fn validate_name(s: &[u8]) -> Result<String, Error> { + // Empty names are not allowed. + if s.is_empty() { + return Err(Error::InvalidLength()); + } + + for (i, c) in s.iter().enumerate() { + if c.is_ascii_alphanumeric() + || (*c == b'.' && i != 0) // can't start with a dot + || *c == b'-' + || *c == b'_' + || *c == b'+' + || *c == b'?' + || *c == b'=' + { + continue; + } + + return Err(Error::InvalidName(s.to_vec(), i)); + } + + Ok(String::from_utf8(s.to_vec()).unwrap()) +} + +impl fmt::Display for StorePath { + /// The string representation of a store path starts with a digest (20 + /// bytes), [crate::nixbase32]-encoded, followed by a `-`, + /// and ends with the name. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}-{}", nixbase32::encode(&self.digest), self.name) + } +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use crate::nixbase32; + use crate::store_path::{DIGEST_SIZE, ENCODED_DIGEST_SIZE}; + use test_case::test_case; + + use super::{Error, StorePath}; + + #[test] + fn encoded_digest_size() { + assert_eq!(ENCODED_DIGEST_SIZE, nixbase32::encode_len(DIGEST_SIZE)); + } + + #[test] + fn happy_path() { + let example_nix_path_str = + "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432"; + let nixpath = StorePath::from_bytes(example_nix_path_str.as_bytes()) + .expect("Error parsing example string"); + + let expected_digest: [u8; DIGEST_SIZE] = [ + 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a, + 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00, + ]; + + assert_eq!("net-tools-1.60_p20170221182432", nixpath.name); + assert_eq!(nixpath.digest, expected_digest); + + assert_eq!(example_nix_path_str, nixpath.to_string()) + } + + /// This is the store path rejected when `nix-store --add`'ing an + /// empty `.gitignore` file. + /// + /// Nix 2.4 accidentally dropped this behaviour, but this is considered a bug. + /// See https://github.com/NixOS/nix/pull/9095. + #[test] + fn starts_with_dot() { + StorePath::from_bytes(b"fli4bwscgna7lpm7v5xgnjxrxh0yc7ra-.gitignore") + .expect_err("must fail"); + } + + #[test] + fn invalid_hash_length() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yy-net-tools-1.60_p20170221182432") + .expect_err("must fail"); + } + + #[test] + fn invalid_encoding_hash() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48aku4la-net-tools-1.60_p20170221182432") + .expect_err("must fail"); + } + + #[test] + fn more_than_just_the_bare_nix_store_path() { + StorePath::from_bytes( + b"00bgd045z0d4icpbc2yyz4gx48aku4la-net-tools-1.60_p20170221182432/bin/arp", + ) + .expect_err("must fail"); + } + + #[test] + fn no_dash_between_hash_and_name() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44lanet-tools-1.60_p20170221182432") + .expect_err("must fail"); + } + + #[test] + fn absolute_path() { + let example_nix_path_str = + "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432"; + let nixpath_expected = + StorePath::from_bytes(example_nix_path_str.as_bytes()).expect("must parse"); + + let nixpath_actual = StorePath::from_absolute_path( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432".as_bytes(), + ) + .expect("must parse"); + + assert_eq!(nixpath_expected, nixpath_actual); + + assert_eq!( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + nixpath_actual.to_absolute_path(), + ); + } + + #[test] + fn absolute_path_missing_prefix() { + assert_eq!( + Error::MissingStoreDir(), + StorePath::from_absolute_path(b"foobar-123").expect_err("must fail") + ); + } + + #[test_case( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + (StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::new()) + ; "without prefix")] + #[test_case( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432/", + (StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::new()) + ; "without prefix, but trailing slash")] + #[test_case( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432/bin/arp", + (StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::from("bin/arp")) + ; "with prefix")] + #[test_case( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432/bin/arp/", + (StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::from("bin/arp/")) + ; "with prefix and trailing slash")] + fn from_absolute_path_full(s: &str, expected: (StorePath, PathBuf)) { + let actual = StorePath::from_absolute_path_full(s).expect("must succeed"); + assert_eq!(expected, actual); + } + + #[test] + fn from_absolute_path_errors() { + assert_eq!( + Error::InvalidLength(), + StorePath::from_absolute_path_full("/nix/store/").expect_err("must fail") + ); + assert_eq!( + Error::InvalidLength(), + StorePath::from_absolute_path_full("/nix/store/foo").expect_err("must fail") + ); + assert_eq!( + Error::MissingStoreDir(), + StorePath::from_absolute_path_full( + "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432" + ) + .expect_err("must fail") + ); + } +} diff --git a/tvix/nix-compat/src/store_path/utils.rs b/tvix/nix-compat/src/store_path/utils.rs new file mode 100644 index 000000000000..ddece96e63f7 --- /dev/null +++ b/tvix/nix-compat/src/store_path/utils.rs @@ -0,0 +1,312 @@ +use super::{Error, STORE_DIR}; +use crate::nixbase32; +use crate::nixhash::{HashAlgo, NixHash, NixHashWithMode}; +use crate::store_path::StorePath; +use sha2::{Digest, Sha256}; +use thiserror; + +/// Errors that can occur when creating a content-addressed store path. +/// +/// This wraps the main [crate::store_path::Error].. +#[derive(Debug, PartialEq, Eq, thiserror::Error)] +pub enum BuildStorePathError { + #[error("Invalid Store Path: {0}")] + InvalidStorePath(Error), + /// This error occurs when we have references outside the SHA-256 + + /// Recursive case. The restriction comes from upstream Nix. It may be + /// lifted at some point but there isn't a pressing need to anticipate that. + #[error("References were not supported as much as requested")] + InvalidReference(), +} + +/// compress_hash takes an arbitrarily long sequence of bytes (usually +/// a hash digest), and returns a sequence of bytes of length +/// OUTPUT_SIZE. +/// +/// It's calculated by rotating through the bytes in the output buffer +/// (zero- initialized), and XOR'ing with each byte of the passed +/// input. It consumes 1 byte at a time, and XOR's it with the current +/// value in the output buffer. +/// +/// This mimics equivalent functionality in C++ Nix. +pub fn compress_hash<const OUTPUT_SIZE: usize>(input: &[u8]) -> [u8; OUTPUT_SIZE] { + let mut output = [0; OUTPUT_SIZE]; + + for (ii, ch) in input.iter().enumerate() { + output[ii % OUTPUT_SIZE] ^= ch; + } + + output +} + +/// This builds a store path, by calculating the text_hash_string of either a +/// derivation or a literal text file that may contain references. +pub fn build_text_path<S: AsRef<str>, I: IntoIterator<Item = S>, C: AsRef<[u8]>>( + name: &str, + content: C, + references: I, +) -> Result<StorePath, Error> { + build_store_path_from_fingerprint_parts( + &make_type("text", references, false), + // the nix_hash_string representation of the sha256 digest of some contents + &{ + let content_digest = { + let hasher = Sha256::new_with_prefix(content); + hasher.finalize() + }; + + // We populate the struct directly, as we know the sha256 digest has the + // right size. + NixHash { + algo: crate::nixhash::HashAlgo::Sha256, + digest: content_digest.to_vec(), + } + }, + name, + ) +} + +/// This builds a more "regular" content-addressed store path +pub fn build_regular_ca_path<S: AsRef<str>, I: IntoIterator<Item = S>>( + name: &str, + hash_with_mode: &NixHashWithMode, + references: I, + self_reference: bool, +) -> Result<StorePath, BuildStorePathError> { + match &hash_with_mode { + NixHashWithMode::Recursive( + ref hash @ NixHash { + algo: HashAlgo::Sha256, + .. + }, + ) => build_store_path_from_fingerprint_parts( + &make_type("source", references, self_reference), + hash, + name, + ) + .map_err(BuildStorePathError::InvalidStorePath), + _ => { + if references.into_iter().next().is_some() { + return Err(BuildStorePathError::InvalidReference()); + } + if self_reference { + return Err(BuildStorePathError::InvalidReference()); + } + build_store_path_from_fingerprint_parts( + "output:out", + &{ + let content_digest = { + let mut hasher = Sha256::new_with_prefix("fixed:out:"); + hasher.update(hash_with_mode.mode().prefix()); + hasher.update(hash_with_mode.digest().algo.to_string()); + hasher.update(":"); + hasher.update( + &data_encoding::HEXLOWER.encode(&hash_with_mode.digest().digest), + ); + hasher.update(":"); + hasher.finalize() + }; + + // We don't use [NixHash::from_algo_and_digest], as we know [Sha256] has + // the right digest size. + NixHash { + algo: crate::nixhash::HashAlgo::Sha256, + digest: content_digest.to_vec(), + } + }, + name, + ) + .map_err(BuildStorePathError::InvalidStorePath) + } + } +} + +/// For given NAR sha256 digest and name, return the new [StorePath] this would have. +pub fn build_nar_based_store_path(nar_sha256_digest: &[u8; 32], name: &str) -> StorePath { + // We populate the struct directly, as we know the sha256 digest has the + // right size. + let nar_hash_with_mode = NixHashWithMode::Recursive(NixHash { + algo: HashAlgo::Sha256, + digest: nar_sha256_digest.to_vec(), + }); + + build_regular_ca_path(name, &nar_hash_with_mode, Vec::<String>::new(), false).unwrap() +} + +/// This builds an input-addressed store path +/// +/// Input-addresed store paths are always derivation outputs, the "input" in question is the +/// derivation and its closure. +pub fn build_output_path( + drv_hash: &NixHash, + output_name: &str, + output_path_name: &str, +) -> Result<StorePath, Error> { + build_store_path_from_fingerprint_parts( + &(String::from("output:") + output_name), + drv_hash, + output_path_name, + ) +} + +/// This builds a store path from fingerprint parts. +/// Usually, that function is used from [build_text_path] and +/// passed a "text hash string" (starting with "text:" as fingerprint), +/// but other fingerprints starting with "output:" are also used in Derivation +/// output path calculation. +/// +/// The fingerprint is hashed with sha256, its digest is compressed to 20 bytes, +/// and nixbase32-encoded (32 characters). +fn build_store_path_from_fingerprint_parts( + ty: &str, + hash: &NixHash, + name: &str, +) -> Result<StorePath, Error> { + let fingerprint = + String::from(ty) + ":" + &hash.to_nix_hash_string() + ":" + STORE_DIR + ":" + name; + let digest = { + let hasher = Sha256::new_with_prefix(fingerprint); + hasher.finalize() + }; + let compressed = compress_hash::<20>(&digest); + super::validate_name(name.as_bytes())?; + Ok(StorePath { + digest: compressed, + name: name.to_string(), + }) +} + +/// This contains the Nix logic to create "text hash strings", which are used +/// in `builtins.toFile`, as well as in Derivation Path calculation. +/// +/// A text hash is calculated by concatenating the following fields, separated by a `:`: +/// +/// - text +/// - references, individually joined by `:` +/// - the nix_hash_string representation of the sha256 digest of some contents +/// - the value of `storeDir` +/// - the name +fn make_type<S: AsRef<str>, I: IntoIterator<Item = S>>( + ty: &str, + references: I, + self_ref: bool, +) -> String { + let mut s = String::from(ty); + + for reference in references { + s.push(':'); + s.push_str(reference.as_ref()); + } + + if self_ref { + s.push_str(":self"); + } + + s +} + +/// Nix placeholders (i.e. values returned by `builtins.placeholder`) +/// are used to populate outputs with paths that must be +/// string-replaced with the actual placeholders later, at runtime. +/// +/// The actual placeholder is basically just a SHA256 hash encoded in +/// cppnix format. +pub fn hash_placeholder(name: &str) -> String { + let digest = { + let mut hasher = Sha256::new(); + hasher.update(format!("nix-output:{}", name)); + hasher.finalize() + }; + + format!("/{}", nixbase32::encode(&digest)) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::nixhash::{NixHash, NixHashWithMode}; + + #[test] + fn build_text_path_with_zero_references() { + // This hash should match `builtins.toFile`, e.g.: + // + // nix-repl> builtins.toFile "foo" "bar" + // "/nix/store/vxjiwkjkn7x4079qvh1jkl5pn05j2aw0-foo" + + let store_path = build_text_path("foo", "bar", Vec::<String>::new()) + .expect("build_store_path() should succeed"); + + assert_eq!( + store_path.to_absolute_path().as_str(), + "/nix/store/vxjiwkjkn7x4079qvh1jkl5pn05j2aw0-foo" + ); + } + + #[test] + fn build_text_path_with_non_zero_references() { + // This hash should match: + // + // nix-repl> builtins.toFile "baz" "${builtins.toFile "foo" "bar"}" + // "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" + + let inner = build_text_path("foo", "bar", Vec::<String>::new()) + .expect("path_with_references() should succeed"); + let inner_path = inner.to_absolute_path(); + + let outer = build_text_path("baz", &inner_path, vec![inner_path.as_str()]) + .expect("path_with_references() should succeed"); + + assert_eq!( + outer.to_absolute_path().as_str(), + "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" + ); + } + + #[test] + fn build_sha1_path() { + let outer = build_regular_ca_path( + "bar", + &NixHashWithMode::Recursive(NixHash { + algo: HashAlgo::Sha1, + digest: data_encoding::HEXLOWER + .decode(b"0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33") + .expect("hex should decode"), + }), + Vec::<String>::new(), + false, + ) + .expect("path_with_references() should succeed"); + + assert_eq!( + outer.to_absolute_path().as_str(), + "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar" + ); + } + + #[test] + fn build_store_path_with_non_zero_references() { + // This hash should match: + // + // nix-repl> builtins.toFile "baz" "${builtins.toFile "foo" "bar"}" + // "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" + // + // $ nix store make-content-addressed /nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz + // rewrote '/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz' to '/nix/store/s89y431zzhmdn3k8r96rvakryddkpv2v-baz' + let outer = build_regular_ca_path( + "baz", + &NixHashWithMode::Recursive(NixHash { + algo: HashAlgo::Sha256, + digest: nixbase32::decode(b"1xqkzcb3909fp07qngljr4wcdnrh1gdam1m2n29i6hhrxlmkgkv1") + .expect("hex should decode"), + }), + vec!["/nix/store/dxwkwjzdaq7ka55pkk252gh32bgpmql4-foo"], + false, + ) + .expect("path_with_references() should succeed"); + + assert_eq!( + outer.to_absolute_path().as_str(), + "/nix/store/s89y431zzhmdn3k8r96rvakryddkpv2v-baz" + ); + } +} |