about summary refs log tree commit diff
path: root/tvix/nix-compat/src/store_path
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-03-30T12·01+0200
committerclbot <clbot@tvl.fyi>2023-03-30T14·03+0000
commit5f2c2e79e1778cda877d6122dd24be08740c8720 (patch)
treeb87dba0063a193a39c1bf57a2b1e3513039e144c /tvix/nix-compat/src/store_path
parent4ab180b1ebf411d7d7ebd98ae17c25945b17e2c2 (diff)
refactor(tvix/nix-compat): move build_store_path out of derivation r/6060
This doesn't have anything to do with ATerms, we just happen to be using
the aterm representation of a Derivation as contents.

Moving this into store_path/utils.rs makes these things much cleaner -
Have a build_store_path_from_references function, and a
build_store_path_from_fingerprint helper function that makes use of it.

build_store_path_from_references is invoked from the derivation module
which can be used to calculate the derivation path.

In the derivation module, we also invoke
build_store_path_from_fingerprint during the output path calculation.

Change-Id: Ia8d61a5e8e5d3f396f93593676ed3f5d1a3f1d66
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8367
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/nix-compat/src/store_path')
-rw-r--r--tvix/nix-compat/src/store_path/mod.rs203
-rw-r--r--tvix/nix-compat/src/store_path/utils.rs118
2 files changed, 321 insertions, 0 deletions
diff --git a/tvix/nix-compat/src/store_path/mod.rs b/tvix/nix-compat/src/store_path/mod.rs
new file mode 100644
index 000000000000..0e004ccd7a10
--- /dev/null
+++ b/tvix/nix-compat/src/store_path/mod.rs
@@ -0,0 +1,203 @@
+use crate::nixbase32::{self, Nixbase32DecodeError};
+use std::fmt;
+use thiserror::Error;
+
+mod utils;
+
+pub use utils::{
+    build_store_path_from_fingerprint, build_store_path_from_references, compress_hash,
+    hash_placeholder,
+};
+
+pub const DIGEST_SIZE: usize = 20;
+// lazy_static doesn't allow us to call NIXBASE32.encode_len(), so we ran it
+// manually and have an assert in the tests.
+pub const ENCODED_DIGEST_SIZE: usize = 32;
+
+// The store dir prefix, without trailing slash.
+// That's usually where the Nix store is mounted at.
+pub const STORE_DIR: &str = "/nix/store";
+pub const STORE_DIR_WITH_SLASH: &str = "/nix/store/";
+
+/// Errors that can occur during the validation of name characters.
+#[derive(Debug, PartialEq, Eq, Error)]
+pub enum Error {
+    #[error("Dash is missing between hash and name")]
+    MissingDash(),
+    #[error("Hash encoding is invalid: {0}")]
+    InvalidHashEncoding(Nixbase32DecodeError),
+    #[error("Invalid name: {0}")]
+    InvalidName(String),
+    #[error("Tried to parse an absolute path which was missing the store dir prefix.")]
+    MissingStoreDir(),
+}
+
+/// Represents a path in the Nix store (a direct child of [STORE_DIR]).
+///
+/// It starts with a digest (20 bytes), [crate::nixbase32]-encoded,
+/// followed by a `-`, and ends with a `name`, which is a string, consisting only of ASCCI
+/// alphanumeric characters, or one of the following characters: `-`, `_`, `.`,
+/// `+`, `?`, `=`.
+///
+/// The name is usually used to describe the pname and version of a package.
+/// Derivations paths can also be represented as store paths, they end
+/// with .drv.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct StorePath {
+    pub digest: [u8; DIGEST_SIZE],
+    pub name: String,
+}
+
+impl StorePath {
+    pub fn from_string(s: &str) -> Result<StorePath, Error> {
+        // the whole string needs to be at least:
+        //
+        // - 32 characters (encoded hash)
+        // - 1 dash
+        // - 1 character for the name
+        if s.len() < ENCODED_DIGEST_SIZE + 2 {
+            return Err(Error::InvalidName("".to_string()));
+        }
+
+        let digest = match nixbase32::decode(s[..ENCODED_DIGEST_SIZE].as_bytes()) {
+            Ok(decoded) => decoded,
+            Err(decoder_error) => return Err(Error::InvalidHashEncoding(decoder_error)),
+        };
+
+        if s.as_bytes()[ENCODED_DIGEST_SIZE] != b'-' {
+            return Err(Error::MissingDash());
+        }
+
+        StorePath::validate_name(&s[ENCODED_DIGEST_SIZE + 2..])?;
+
+        Ok(StorePath {
+            name: s[ENCODED_DIGEST_SIZE + 1..].to_string(),
+            digest: digest.try_into().expect("size is known"),
+        })
+    }
+
+    /// Construct a [StorePath] from an absolute store path string.
+    /// That is a string starting with the store prefix (/nix/store)
+    pub fn from_absolute_path(s: &str) -> Result<StorePath, Error> {
+        match s.strip_prefix(STORE_DIR_WITH_SLASH) {
+            Some(s_stripped) => Self::from_string(s_stripped),
+            None => Err(Error::MissingStoreDir()),
+        }
+    }
+
+    /// Converts the [StorePath] to an absolute store path string.
+    /// That is a string starting with the store prefix (/nix/store)
+    pub fn to_absolute_path(&self) -> String {
+        format!("{}/{}", STORE_DIR, self)
+    }
+
+    /// Checks a given &str to match the restrictions for store path names.
+    pub fn validate_name(s: &str) -> Result<(), Error> {
+        for c in s.chars() {
+            if c.is_ascii_alphanumeric()
+                || c == '-'
+                || c == '_'
+                || c == '.'
+                || c == '+'
+                || c == '?'
+                || c == '='
+            {
+                continue;
+            }
+
+            return Err(Error::InvalidName(s.to_string()));
+        }
+
+        Ok(())
+    }
+}
+
+impl fmt::Display for StorePath {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}-{}", nixbase32::encode(&self.digest), self.name)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::nixbase32;
+    use crate::store_path::{DIGEST_SIZE, ENCODED_DIGEST_SIZE};
+
+    use super::{Error, StorePath};
+
+    #[test]
+    fn encoded_digest_size() {
+        assert_eq!(ENCODED_DIGEST_SIZE, nixbase32::encode_len(DIGEST_SIZE));
+    }
+
+    #[test]
+    fn happy_path() {
+        let example_nix_path_str =
+            "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432";
+        let nixpath =
+            StorePath::from_string(example_nix_path_str).expect("Error parsing example string");
+
+        let expected_digest: [u8; DIGEST_SIZE] = [
+            0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a,
+            0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00,
+        ];
+
+        assert_eq!("net-tools-1.60_p20170221182432", nixpath.name);
+        assert_eq!(nixpath.digest, expected_digest);
+
+        assert_eq!(example_nix_path_str, nixpath.to_string())
+    }
+
+    #[test]
+    fn invalid_hash_length() {
+        StorePath::from_string("00bgd045z0d4icpbc2yy-net-tools-1.60_p20170221182432")
+            .expect_err("No error raised.");
+    }
+
+    #[test]
+    fn invalid_encoding_hash() {
+        StorePath::from_string("00bgd045z0d4icpbc2yyz4gx48aku4la-net-tools-1.60_p20170221182432")
+            .expect_err("No error raised.");
+    }
+
+    #[test]
+    fn more_than_just_the_bare_nix_store_path() {
+        StorePath::from_string(
+            "00bgd045z0d4icpbc2yyz4gx48aku4la-net-tools-1.60_p20170221182432/bin/arp",
+        )
+        .expect_err("No error raised.");
+    }
+
+    #[test]
+    fn no_dash_between_hash_and_name() {
+        StorePath::from_string("00bgd045z0d4icpbc2yyz4gx48ak44lanet-tools-1.60_p20170221182432")
+            .expect_err("No error raised.");
+    }
+
+    #[test]
+    fn absolute_path() {
+        let example_nix_path_str =
+            "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432";
+        let nixpath_expected = StorePath::from_string(example_nix_path_str).expect("must parse");
+
+        let nixpath_actual = StorePath::from_absolute_path(
+            "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432",
+        )
+        .expect("must parse");
+
+        assert_eq!(nixpath_expected, nixpath_actual);
+
+        assert_eq!(
+            "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432",
+            nixpath_actual.to_absolute_path(),
+        );
+    }
+
+    #[test]
+    fn absolute_path_missing_prefix() {
+        assert_eq!(
+            Error::MissingStoreDir(),
+            StorePath::from_absolute_path("foobar-123").expect_err("must fail")
+        );
+    }
+}
diff --git a/tvix/nix-compat/src/store_path/utils.rs b/tvix/nix-compat/src/store_path/utils.rs
new file mode 100644
index 000000000000..0d96414a212d
--- /dev/null
+++ b/tvix/nix-compat/src/store_path/utils.rs
@@ -0,0 +1,118 @@
+use crate::nixbase32;
+use crate::store_path::StorePath;
+use crate::texthash::text_hash_string;
+use sha2::{Digest, Sha256};
+
+use super::Error;
+
+/// compress_hash takes an arbitrarily long sequence of bytes (usually
+/// a hash digest), and returns a sequence of bytes of length
+/// output_size.
+///
+/// It's calculated by rotating through the bytes in the output buffer
+/// (zero- initialized), and XOR'ing with each byte of the passed
+/// input. It consumes 1 byte at a time, and XOR's it with the current
+/// value in the output buffer.
+///
+/// This mimics equivalent functionality in C++ Nix.
+pub fn compress_hash(input: &[u8], output_size: usize) -> Vec<u8> {
+    let mut output: Vec<u8> = vec![0; output_size];
+
+    for (ii, ch) in input.iter().enumerate() {
+        output[ii % output_size] ^= ch;
+    }
+
+    output
+}
+
+/// This builds a store path, by calculating the text_hash_string of either a
+/// derivation or a literal text file that may contain references.
+pub fn build_store_path_from_references<
+    S: AsRef<str>,
+    I: IntoIterator<Item = S>,
+    C: AsRef<[u8]>,
+>(
+    name: &str,
+    content: C,
+    references: I,
+) -> Result<StorePath, Error> {
+    let text_hash_str = text_hash_string(name, content, references);
+    build_store_path_from_fingerprint(name, &text_hash_str)
+}
+
+/// This builds a store path from a fingerprint.
+/// Usually, that function is used from [build_store_path_from_references] and
+/// passed a "text hash string" (starting with "text:" as fingerprint),
+/// but other fingerprints starting with "output:" are also used in Derivation
+/// output path calculation.
+///
+/// The fingerprint is hashed with sha256, its digest is compressed to 20 bytes,
+/// and nixbase32-encoded (32 characters).
+pub fn build_store_path_from_fingerprint(
+    name: &str,
+    fingerprint: &str,
+) -> Result<StorePath, Error> {
+    let digest = {
+        let hasher = Sha256::new_with_prefix(fingerprint);
+        hasher.finalize()
+    };
+    let compressed = compress_hash(&digest, 20);
+    StorePath::from_string(format!("{}-{}", nixbase32::encode(&compressed), name).as_str())
+}
+
+/// Nix placeholders (i.e. values returned by `builtins.placeholder`)
+/// are used to populate outputs with paths that must be
+/// string-replaced with the actual placeholders later, at runtime.
+///
+/// The actual placeholder is basically just a SHA256 hash encoded in
+/// cppnix format.
+pub fn hash_placeholder(name: &str) -> String {
+    let digest = {
+        let mut hasher = Sha256::new();
+        hasher.update(format!("nix-output:{}", name));
+        hasher.finalize()
+    };
+
+    format!("/{}", nixbase32::encode(&digest))
+}
+
+#[cfg(test)]
+mod test {
+    use crate::store_path::build_store_path_from_references;
+
+    #[test]
+    fn build_store_path_with_zero_references() {
+        // This hash should match `builtins.toFile`, e.g.:
+        //
+        // nix-repl> builtins.toFile "foo" "bar"
+        // "/nix/store/vxjiwkjkn7x4079qvh1jkl5pn05j2aw0-foo"
+
+        let store_path = build_store_path_from_references("foo", "bar", Vec::<String>::new())
+            .expect("build_store_path() should succeed");
+
+        assert_eq!(
+            store_path.to_absolute_path().as_str(),
+            "/nix/store/vxjiwkjkn7x4079qvh1jkl5pn05j2aw0-foo"
+        );
+    }
+
+    #[test]
+    fn build_store_path_with_non_zero_references() {
+        // This hash should match:
+        //
+        // nix-repl> builtins.toFile "baz" "${builtins.toFile "foo" "bar"}"
+        // "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz"
+
+        let inner = build_store_path_from_references("foo", "bar", Vec::<String>::new())
+            .expect("path_with_references() should succeed");
+        let inner_path = inner.to_absolute_path();
+
+        let outer = build_store_path_from_references("baz", &inner_path, vec![inner_path.as_str()])
+            .expect("path_with_references() should succeed");
+
+        assert_eq!(
+            outer.to_absolute_path().as_str(),
+            "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz"
+        );
+    }
+}