about summary refs log tree commit diff
path: root/tvix/nix-compat/src/nar
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/nix-compat/src/nar')
-rw-r--r--tvix/nix-compat/src/nar/listing/mod.rs128
-rw-r--r--tvix/nix-compat/src/nar/listing/test.rs59
-rw-r--r--tvix/nix-compat/src/nar/mod.rs1
-rw-r--r--tvix/nix-compat/src/nar/reader/mod.rs4
-rw-r--r--tvix/nix-compat/src/nar/tests/nixos-release.ls1
-rw-r--r--tvix/nix-compat/src/nar/wire/mod.rs8
-rw-r--r--tvix/nix-compat/src/nar/writer/sync.rs91
7 files changed, 276 insertions, 16 deletions
diff --git a/tvix/nix-compat/src/nar/listing/mod.rs b/tvix/nix-compat/src/nar/listing/mod.rs
new file mode 100644
index 000000000000..5a9a3b4d3613
--- /dev/null
+++ b/tvix/nix-compat/src/nar/listing/mod.rs
@@ -0,0 +1,128 @@
+//! Parser for the Nix archive listing format, aka .ls.
+//!
+//! LS files are produced by the C++ Nix implementation via `write-nar-listing=1` query parameter
+//! passed to a store implementation when transferring store paths.
+//!
+//! Listing files contains metadata about a file and its offset in the corresponding NAR.
+//!
+//! NOTE: LS entries does not offer any integrity field to validate the retrieved file at the provided
+//! offset. Validating the contents is the caller's responsibility.
+
+use std::{
+    collections::HashMap,
+    path::{Component, Path},
+};
+
+use serde::Deserialize;
+
+#[cfg(test)]
+mod test;
+
+#[derive(Debug, thiserror::Error)]
+pub enum ListingError {
+    // TODO: add an enum of what component was problematic
+    // reusing `std::path::Component` is not possible as it contains a lifetime.
+    /// An unsupported path component can be:
+    /// - either a Windows prefix (`C:\\`, `\\share\\`)
+    /// - either a parent directory (`..`)
+    /// - either a root directory (`/`)
+    #[error("unsupported path component")]
+    UnsupportedPathComponent,
+    #[error("invalid encoding for entry component")]
+    InvalidEncoding,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(tag = "type", rename_all = "lowercase")]
+pub enum ListingEntry {
+    Regular {
+        size: u64,
+        #[serde(default)]
+        executable: bool,
+        #[serde(rename = "narOffset")]
+        nar_offset: u64,
+    },
+    Directory {
+        // It's tempting to think that the key should be a `Vec<u8>`
+        // but Nix does not support that and will fail to emit a listing version 1 for any non-UTF8
+        // encodeable string.
+        entries: HashMap<String, ListingEntry>,
+    },
+    Symlink {
+        target: String,
+    },
+}
+
+impl ListingEntry {
+    /// Given a relative path without `..` component, this will locate, relative to this entry, a
+    /// deeper entry.
+    ///
+    /// If the path is invalid, a listing error [`ListingError`] will be returned.
+    /// If the entry cannot be found, `None` will be returned.
+    pub fn locate<P: AsRef<Path>>(&self, path: P) -> Result<Option<&ListingEntry>, ListingError> {
+        // We perform a simple DFS on the components of the path
+        // while rejecting dangerous components, e.g. `..` or `/`
+        // Files and symlinks are *leaves*, i.e. we return them
+        let mut cur = self;
+        for component in path.as_ref().components() {
+            match component {
+                Component::CurDir => continue,
+                Component::RootDir | Component::Prefix(_) | Component::ParentDir => {
+                    return Err(ListingError::UnsupportedPathComponent)
+                }
+                Component::Normal(file_or_dir_name) => {
+                    if let Self::Directory { entries } = cur {
+                        // As Nix cannot encode non-UTF8 components in the listing (see comment on
+                        // the `Directory` enum variant), invalid encodings path components are
+                        // errors.
+                        let entry_name = file_or_dir_name
+                            .to_str()
+                            .ok_or(ListingError::InvalidEncoding)?;
+
+                        if let Some(new_entry) = entries.get(entry_name) {
+                            cur = new_entry;
+                        } else {
+                            return Ok(None);
+                        }
+                    } else {
+                        return Ok(None);
+                    }
+                }
+            }
+        }
+
+        // By construction, we found the node that corresponds to the path traversal.
+        Ok(Some(cur))
+    }
+}
+
+#[derive(Debug)]
+pub struct ListingVersion<const V: u8>;
+
+#[derive(Debug, thiserror::Error)]
+#[error("Invalid version: {0}")]
+struct ListingVersionError(u8);
+
+impl<'de, const V: u8> Deserialize<'de> for ListingVersion<V> {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        let value = u8::deserialize(deserializer)?;
+        if value == V {
+            Ok(ListingVersion::<V>)
+        } else {
+            Err(serde::de::Error::custom(ListingVersionError(value)))
+        }
+    }
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(untagged)]
+#[non_exhaustive]
+pub enum Listing {
+    V1 {
+        root: ListingEntry,
+        version: ListingVersion<1>,
+    },
+}
diff --git a/tvix/nix-compat/src/nar/listing/test.rs b/tvix/nix-compat/src/nar/listing/test.rs
new file mode 100644
index 000000000000..5b2ac3f166fe
--- /dev/null
+++ b/tvix/nix-compat/src/nar/listing/test.rs
@@ -0,0 +1,59 @@
+use std::{collections::HashMap, path::PathBuf, str::FromStr};
+
+use crate::nar;
+
+#[test]
+fn weird_paths() {
+    let root = nar::listing::ListingEntry::Directory {
+        entries: HashMap::new(),
+    };
+
+    root.locate("../../../../etc/passwd")
+        .expect_err("Failed to reject `../` fragment in a path during traversal");
+
+    // Gated on Windows as C:\\ is parsed as `Component::Normal(_)` on Linux.
+    #[cfg(target_os = "windows")]
+    root.locate("C:\\\\Windows\\System32")
+        .expect_err("Failed to reject Windows-style prefixes");
+
+    root.locate("/etc/passwd")
+        .expect_err("Failed to reject absolute UNIX paths");
+}
+
+#[test]
+fn nixos_release() {
+    let listing_bytes = include_bytes!("../tests/nixos-release.ls");
+    let listing: nar::listing::Listing = serde_json::from_slice(listing_bytes).unwrap();
+
+    let nar::listing::Listing::V1 { root, .. } = listing;
+    assert!(matches!(root, nar::listing::ListingEntry::Directory { .. }));
+
+    let build_products = root
+        .locate(PathBuf::from_str("nix-support/hydra-build-products").unwrap())
+        .expect("Failed to locate a known file in a directory")
+        .expect("File was unexpectedly not found in the listing");
+
+    assert!(matches!(
+        build_products,
+        nar::listing::ListingEntry::Regular { .. }
+    ));
+
+    let nonexisting_file = root
+        .locate(PathBuf::from_str("nix-support/does-not-exist").unwrap())
+        .expect("Failed to locate an unknown file in a directory");
+
+    assert!(
+        nonexisting_file.is_none(),
+        "Non-existing file was unexpectedly found in the listing"
+    );
+
+    let existing_dir = root
+        .locate(PathBuf::from_str("nix-support").unwrap())
+        .expect("Failed to locate a known directory in a directory")
+        .expect("Directory was expectedly found in the listing");
+
+    assert!(matches!(
+        existing_dir,
+        nar::listing::ListingEntry::Directory { .. }
+    ));
+}
diff --git a/tvix/nix-compat/src/nar/mod.rs b/tvix/nix-compat/src/nar/mod.rs
index c678d26ffb38..d0e8ee8a412f 100644
--- a/tvix/nix-compat/src/nar/mod.rs
+++ b/tvix/nix-compat/src/nar/mod.rs
@@ -1,4 +1,5 @@
 pub(crate) mod wire;
 
+pub mod listing;
 pub mod reader;
 pub mod writer;
diff --git a/tvix/nix-compat/src/nar/reader/mod.rs b/tvix/nix-compat/src/nar/reader/mod.rs
index 9e9237ead363..eef3b10f3c28 100644
--- a/tvix/nix-compat/src/nar/reader/mod.rs
+++ b/tvix/nix-compat/src/nar/reader/mod.rs
@@ -16,7 +16,7 @@ use std::marker::PhantomData;
 // Required reading for understanding this module.
 use crate::nar::wire;
 
-#[cfg(feature = "async")]
+#[cfg(all(feature = "async", feature = "wire"))]
 pub mod r#async;
 
 mod read;
@@ -29,9 +29,11 @@ struct ArchiveReader<'a, 'r> {
     inner: &'a mut Reader<'r>,
 
     /// In debug mode, also track when we need to abandon this archive reader.
+    ///
     /// The archive reader must be abandoned when:
     ///   * An error is encountered at any point
     ///   * A file or directory reader is dropped before being read entirely.
+    ///
     /// All of these checks vanish in release mode.
     status: ArchiveReaderStatus<'a>,
 }
diff --git a/tvix/nix-compat/src/nar/tests/nixos-release.ls b/tvix/nix-compat/src/nar/tests/nixos-release.ls
new file mode 100644
index 000000000000..9dd350b7cf86
--- /dev/null
+++ b/tvix/nix-compat/src/nar/tests/nixos-release.ls
@@ -0,0 +1 @@
+{"root":{"entries":{"iso":{"entries":{"nixos-minimal-new-kernel-no-zfs-24.11pre660688.bee6b69aad74-x86_64-linux.iso":{"narOffset":440,"size":1051721728,"type":"regular"}},"type":"directory"},"nix-support":{"entries":{"hydra-build-products":{"narOffset":1051722544,"size":211,"type":"regular"},"system":{"narOffset":1051722944,"size":13,"type":"regular"}},"type":"directory"}},"type":"directory"},"version":1}
\ No newline at end of file
diff --git a/tvix/nix-compat/src/nar/wire/mod.rs b/tvix/nix-compat/src/nar/wire/mod.rs
index 9e99b530ce15..67654129ee1d 100644
--- a/tvix/nix-compat/src/nar/wire/mod.rs
+++ b/tvix/nix-compat/src/nar/wire/mod.rs
@@ -39,7 +39,7 @@
 //! TOK_NAR ::= "nix-archive-1" "(" "type"
 //! TOK_SYM ::= "symlink" "target"
 //! TOK_REG ::= "regular" "contents"
-//! TOK_EXE ::= "regular" "executable" ""
+//! TOK_EXE ::= "regular" "executable" "" "contents"
 //! TOK_DIR ::= "directory"
 //! TOK_ENT ::= "entry" "(" "name"
 //! TOK_NOD ::= "node" "(" "type"
@@ -91,13 +91,15 @@ pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0
 pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0";
 pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0";
 #[cfg(feature = "async")]
+#[allow(dead_code)]
 const TOK_PAD_PAR: [u8; 24] = *b"\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0";
 
 #[cfg(feature = "async")]
+#[allow(dead_code)]
 #[derive(Debug)]
 pub(crate) enum PadPar {}
 
-#[cfg(feature = "async")]
+#[cfg(all(feature = "async", feature = "wire"))]
 impl crate::wire::reader::Tag for PadPar {
     const PATTERN: &'static [u8] = &TOK_PAD_PAR;
 
@@ -119,6 +121,8 @@ fn tokens() {
         (&TOK_ENT, &["entry", "(", "name"]),
         (&TOK_NOD, &["node", "(", "type"]),
         (&TOK_PAR, &[")"]),
+        #[cfg(feature = "async")]
+        (&TOK_PAD_PAR, &["", ")"]),
     ];
 
     for &(tok, xs) in cases {
diff --git a/tvix/nix-compat/src/nar/writer/sync.rs b/tvix/nix-compat/src/nar/writer/sync.rs
index 6270129028fa..b441479ac60b 100644
--- a/tvix/nix-compat/src/nar/writer/sync.rs
+++ b/tvix/nix-compat/src/nar/writer/sync.rs
@@ -35,11 +35,8 @@ use std::io::{
     Write,
 };
 
-/// Convenience type alias for types implementing [`Write`].
-pub type Writer<'a> = dyn Write + Send + 'a;
-
 /// Create a new NAR, writing the output to the specified writer.
-pub fn open<'a, 'w: 'a>(writer: &'a mut Writer<'w>) -> io::Result<Node<'a, 'w>> {
+pub fn open<W: Write>(writer: &mut W) -> io::Result<Node<W>> {
     let mut node = Node { writer };
     node.write(&wire::TOK_NAR)?;
     Ok(node)
@@ -49,11 +46,11 @@ pub fn open<'a, 'w: 'a>(writer: &'a mut Writer<'w>) -> io::Result<Node<'a, 'w>>
 ///
 /// A NAR can be thought of as a tree of nodes represented by this type. Each
 /// node can be a file, a symlink or a directory containing other nodes.
-pub struct Node<'a, 'w: 'a> {
-    writer: &'a mut Writer<'w>,
+pub struct Node<'a, W: Write> {
+    writer: &'a mut W,
 }
 
-impl<'a, 'w> Node<'a, 'w> {
+impl<'a, W: Write> Node<'a, W> {
     fn write(&mut self, data: &[u8]) -> io::Result<()> {
         self.writer.write_all(data)
     }
@@ -123,12 +120,59 @@ impl<'a, 'w> Node<'a, 'w> {
         Ok(())
     }
 
+    /// Make this node a single file but let the user handle the writing of the file contents.
+    /// The user gets access to a writer to write the file contents to, plus a struct they must
+    /// invoke a function on to finish writing the NAR file.
+    ///
+    /// It is the caller's responsibility to write the correct number of bytes to the writer and
+    /// invoke [`FileManualWrite::close`], or invalid archives will be produced silently.
+    ///
+    /// ```rust
+    /// # use std::io::BufReader;
+    /// # use std::io::Write;
+    /// #
+    /// # // Output location to write the NAR to.
+    /// # let mut sink: Vec<u8> = Vec::new();
+    /// #
+    /// # // Instantiate writer for this output location.
+    /// # let mut nar = nix_compat::nar::writer::open(&mut sink)?;
+    /// #
+    /// let contents = "Hello world\n".as_bytes();
+    /// let size = contents.len() as u64;
+    /// let executable = false;
+    ///
+    /// let (writer, skip) = nar
+    ///     .file_manual_write(executable, size)?;
+    ///
+    /// // Write the contents
+    /// writer.write_all(&contents)?;
+    ///
+    /// // Close the file node
+    /// skip.close(writer)?;
+    /// # Ok::<(), std::io::Error>(())
+    /// ```
+    pub fn file_manual_write(
+        mut self,
+        executable: bool,
+        size: u64,
+    ) -> io::Result<(&'a mut W, FileManualWrite)> {
+        self.write(if executable {
+            &wire::TOK_EXE
+        } else {
+            &wire::TOK_REG
+        })?;
+
+        self.write(&size.to_le_bytes())?;
+
+        Ok((self.writer, FileManualWrite { size }))
+    }
+
     /// Make this node a directory, the content of which is set using the
     /// resulting [`Directory`] value.
     ///
     /// It is the caller's responsibility to invoke [`Directory::close`],
     /// or invalid archives will be produced silently.
-    pub fn directory(mut self) -> io::Result<Directory<'a, 'w>> {
+    pub fn directory(mut self) -> io::Result<Directory<'a, W>> {
         self.write(&wire::TOK_DIR)?;
         Ok(Directory::new(self))
     }
@@ -145,13 +189,13 @@ fn into_name(_name: &[u8]) -> Name {
 }
 
 /// Content of a NAR node that represents a directory.
-pub struct Directory<'a, 'w> {
-    node: Node<'a, 'w>,
+pub struct Directory<'a, W: Write> {
+    node: Node<'a, W>,
     prev_name: Option<Name>,
 }
 
-impl<'a, 'w> Directory<'a, 'w> {
-    fn new(node: Node<'a, 'w>) -> Self {
+impl<'a, W: Write> Directory<'a, W> {
+    fn new(node: Node<'a, W>) -> Self {
         Self {
             node,
             prev_name: None,
@@ -166,7 +210,7 @@ impl<'a, 'w> Directory<'a, 'w> {
     /// It is the caller's responsibility to ensure that directory entries are
     /// written in order of ascending name. If this is not ensured, this method
     /// may panic or silently produce invalid archives.
-    pub fn entry(&mut self, name: &[u8]) -> io::Result<Node<'_, 'w>> {
+    pub fn entry(&mut self, name: &[u8]) -> io::Result<Node<'_, W>> {
         debug_assert!(
             name.len() <= wire::MAX_NAME_LEN,
             "name.len() > {}",
@@ -222,3 +266,24 @@ impl<'a, 'w> Directory<'a, 'w> {
         Ok(())
     }
 }
+
+/// Content of a NAR node that represents a file whose contents are being written out manually.
+/// Returned by the `file_manual_write` function.
+#[must_use]
+pub struct FileManualWrite {
+    size: u64,
+}
+
+impl FileManualWrite {
+    /// Finish writing the file structure to the NAR after having manually written the file contents.
+    ///
+    /// **Important:** This *must* be called with the writer returned by file_manual_write after
+    /// the file contents have been manually and fully written. Otherwise the resulting NAR file
+    /// will be invalid.
+    pub fn close<W: Write>(self, writer: &mut W) -> io::Result<()> {
+        let mut node = Node { writer };
+        node.pad(self.size)?;
+        node.write(&wire::TOK_PAR)?;
+        Ok(())
+    }
+}