diff options
Diffstat (limited to 'tvix/nix-compat/src/nar')
-rw-r--r-- | tvix/nix-compat/src/nar/listing/mod.rs | 128 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/listing/test.rs | 59 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/mod.rs | 1 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/mod.rs | 4 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/tests/nixos-release.ls | 1 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/wire/mod.rs | 8 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/writer/sync.rs | 91 |
7 files changed, 276 insertions, 16 deletions
diff --git a/tvix/nix-compat/src/nar/listing/mod.rs b/tvix/nix-compat/src/nar/listing/mod.rs new file mode 100644 index 000000000000..5a9a3b4d3613 --- /dev/null +++ b/tvix/nix-compat/src/nar/listing/mod.rs @@ -0,0 +1,128 @@ +//! Parser for the Nix archive listing format, aka .ls. +//! +//! LS files are produced by the C++ Nix implementation via `write-nar-listing=1` query parameter +//! passed to a store implementation when transferring store paths. +//! +//! Listing files contains metadata about a file and its offset in the corresponding NAR. +//! +//! NOTE: LS entries does not offer any integrity field to validate the retrieved file at the provided +//! offset. Validating the contents is the caller's responsibility. + +use std::{ + collections::HashMap, + path::{Component, Path}, +}; + +use serde::Deserialize; + +#[cfg(test)] +mod test; + +#[derive(Debug, thiserror::Error)] +pub enum ListingError { + // TODO: add an enum of what component was problematic + // reusing `std::path::Component` is not possible as it contains a lifetime. + /// An unsupported path component can be: + /// - either a Windows prefix (`C:\\`, `\\share\\`) + /// - either a parent directory (`..`) + /// - either a root directory (`/`) + #[error("unsupported path component")] + UnsupportedPathComponent, + #[error("invalid encoding for entry component")] + InvalidEncoding, +} + +#[derive(Debug, Deserialize)] +#[serde(tag = "type", rename_all = "lowercase")] +pub enum ListingEntry { + Regular { + size: u64, + #[serde(default)] + executable: bool, + #[serde(rename = "narOffset")] + nar_offset: u64, + }, + Directory { + // It's tempting to think that the key should be a `Vec<u8>` + // but Nix does not support that and will fail to emit a listing version 1 for any non-UTF8 + // encodeable string. + entries: HashMap<String, ListingEntry>, + }, + Symlink { + target: String, + }, +} + +impl ListingEntry { + /// Given a relative path without `..` component, this will locate, relative to this entry, a + /// deeper entry. + /// + /// If the path is invalid, a listing error [`ListingError`] will be returned. + /// If the entry cannot be found, `None` will be returned. + pub fn locate<P: AsRef<Path>>(&self, path: P) -> Result<Option<&ListingEntry>, ListingError> { + // We perform a simple DFS on the components of the path + // while rejecting dangerous components, e.g. `..` or `/` + // Files and symlinks are *leaves*, i.e. we return them + let mut cur = self; + for component in path.as_ref().components() { + match component { + Component::CurDir => continue, + Component::RootDir | Component::Prefix(_) | Component::ParentDir => { + return Err(ListingError::UnsupportedPathComponent) + } + Component::Normal(file_or_dir_name) => { + if let Self::Directory { entries } = cur { + // As Nix cannot encode non-UTF8 components in the listing (see comment on + // the `Directory` enum variant), invalid encodings path components are + // errors. + let entry_name = file_or_dir_name + .to_str() + .ok_or(ListingError::InvalidEncoding)?; + + if let Some(new_entry) = entries.get(entry_name) { + cur = new_entry; + } else { + return Ok(None); + } + } else { + return Ok(None); + } + } + } + } + + // By construction, we found the node that corresponds to the path traversal. + Ok(Some(cur)) + } +} + +#[derive(Debug)] +pub struct ListingVersion<const V: u8>; + +#[derive(Debug, thiserror::Error)] +#[error("Invalid version: {0}")] +struct ListingVersionError(u8); + +impl<'de, const V: u8> Deserialize<'de> for ListingVersion<V> { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + let value = u8::deserialize(deserializer)?; + if value == V { + Ok(ListingVersion::<V>) + } else { + Err(serde::de::Error::custom(ListingVersionError(value))) + } + } +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +#[non_exhaustive] +pub enum Listing { + V1 { + root: ListingEntry, + version: ListingVersion<1>, + }, +} diff --git a/tvix/nix-compat/src/nar/listing/test.rs b/tvix/nix-compat/src/nar/listing/test.rs new file mode 100644 index 000000000000..5b2ac3f166fe --- /dev/null +++ b/tvix/nix-compat/src/nar/listing/test.rs @@ -0,0 +1,59 @@ +use std::{collections::HashMap, path::PathBuf, str::FromStr}; + +use crate::nar; + +#[test] +fn weird_paths() { + let root = nar::listing::ListingEntry::Directory { + entries: HashMap::new(), + }; + + root.locate("../../../../etc/passwd") + .expect_err("Failed to reject `../` fragment in a path during traversal"); + + // Gated on Windows as C:\\ is parsed as `Component::Normal(_)` on Linux. + #[cfg(target_os = "windows")] + root.locate("C:\\\\Windows\\System32") + .expect_err("Failed to reject Windows-style prefixes"); + + root.locate("/etc/passwd") + .expect_err("Failed to reject absolute UNIX paths"); +} + +#[test] +fn nixos_release() { + let listing_bytes = include_bytes!("../tests/nixos-release.ls"); + let listing: nar::listing::Listing = serde_json::from_slice(listing_bytes).unwrap(); + + let nar::listing::Listing::V1 { root, .. } = listing; + assert!(matches!(root, nar::listing::ListingEntry::Directory { .. })); + + let build_products = root + .locate(PathBuf::from_str("nix-support/hydra-build-products").unwrap()) + .expect("Failed to locate a known file in a directory") + .expect("File was unexpectedly not found in the listing"); + + assert!(matches!( + build_products, + nar::listing::ListingEntry::Regular { .. } + )); + + let nonexisting_file = root + .locate(PathBuf::from_str("nix-support/does-not-exist").unwrap()) + .expect("Failed to locate an unknown file in a directory"); + + assert!( + nonexisting_file.is_none(), + "Non-existing file was unexpectedly found in the listing" + ); + + let existing_dir = root + .locate(PathBuf::from_str("nix-support").unwrap()) + .expect("Failed to locate a known directory in a directory") + .expect("Directory was expectedly found in the listing"); + + assert!(matches!( + existing_dir, + nar::listing::ListingEntry::Directory { .. } + )); +} diff --git a/tvix/nix-compat/src/nar/mod.rs b/tvix/nix-compat/src/nar/mod.rs index c678d26ffb38..d0e8ee8a412f 100644 --- a/tvix/nix-compat/src/nar/mod.rs +++ b/tvix/nix-compat/src/nar/mod.rs @@ -1,4 +1,5 @@ pub(crate) mod wire; +pub mod listing; pub mod reader; pub mod writer; diff --git a/tvix/nix-compat/src/nar/reader/mod.rs b/tvix/nix-compat/src/nar/reader/mod.rs index 9e9237ead363..eef3b10f3c28 100644 --- a/tvix/nix-compat/src/nar/reader/mod.rs +++ b/tvix/nix-compat/src/nar/reader/mod.rs @@ -16,7 +16,7 @@ use std::marker::PhantomData; // Required reading for understanding this module. use crate::nar::wire; -#[cfg(feature = "async")] +#[cfg(all(feature = "async", feature = "wire"))] pub mod r#async; mod read; @@ -29,9 +29,11 @@ struct ArchiveReader<'a, 'r> { inner: &'a mut Reader<'r>, /// In debug mode, also track when we need to abandon this archive reader. + /// /// The archive reader must be abandoned when: /// * An error is encountered at any point /// * A file or directory reader is dropped before being read entirely. + /// /// All of these checks vanish in release mode. status: ArchiveReaderStatus<'a>, } diff --git a/tvix/nix-compat/src/nar/tests/nixos-release.ls b/tvix/nix-compat/src/nar/tests/nixos-release.ls new file mode 100644 index 000000000000..9dd350b7cf86 --- /dev/null +++ b/tvix/nix-compat/src/nar/tests/nixos-release.ls @@ -0,0 +1 @@ +{"root":{"entries":{"iso":{"entries":{"nixos-minimal-new-kernel-no-zfs-24.11pre660688.bee6b69aad74-x86_64-linux.iso":{"narOffset":440,"size":1051721728,"type":"regular"}},"type":"directory"},"nix-support":{"entries":{"hydra-build-products":{"narOffset":1051722544,"size":211,"type":"regular"},"system":{"narOffset":1051722944,"size":13,"type":"regular"}},"type":"directory"}},"type":"directory"},"version":1} \ No newline at end of file diff --git a/tvix/nix-compat/src/nar/wire/mod.rs b/tvix/nix-compat/src/nar/wire/mod.rs index 9e99b530ce15..67654129ee1d 100644 --- a/tvix/nix-compat/src/nar/wire/mod.rs +++ b/tvix/nix-compat/src/nar/wire/mod.rs @@ -39,7 +39,7 @@ //! TOK_NAR ::= "nix-archive-1" "(" "type" //! TOK_SYM ::= "symlink" "target" //! TOK_REG ::= "regular" "contents" -//! TOK_EXE ::= "regular" "executable" "" +//! TOK_EXE ::= "regular" "executable" "" "contents" //! TOK_DIR ::= "directory" //! TOK_ENT ::= "entry" "(" "name" //! TOK_NOD ::= "node" "(" "type" @@ -91,13 +91,15 @@ pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0 pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0"; #[cfg(feature = "async")] +#[allow(dead_code)] const TOK_PAD_PAR: [u8; 24] = *b"\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0"; #[cfg(feature = "async")] +#[allow(dead_code)] #[derive(Debug)] pub(crate) enum PadPar {} -#[cfg(feature = "async")] +#[cfg(all(feature = "async", feature = "wire"))] impl crate::wire::reader::Tag for PadPar { const PATTERN: &'static [u8] = &TOK_PAD_PAR; @@ -119,6 +121,8 @@ fn tokens() { (&TOK_ENT, &["entry", "(", "name"]), (&TOK_NOD, &["node", "(", "type"]), (&TOK_PAR, &[")"]), + #[cfg(feature = "async")] + (&TOK_PAD_PAR, &["", ")"]), ]; for &(tok, xs) in cases { diff --git a/tvix/nix-compat/src/nar/writer/sync.rs b/tvix/nix-compat/src/nar/writer/sync.rs index 6270129028fa..b441479ac60b 100644 --- a/tvix/nix-compat/src/nar/writer/sync.rs +++ b/tvix/nix-compat/src/nar/writer/sync.rs @@ -35,11 +35,8 @@ use std::io::{ Write, }; -/// Convenience type alias for types implementing [`Write`]. -pub type Writer<'a> = dyn Write + Send + 'a; - /// Create a new NAR, writing the output to the specified writer. -pub fn open<'a, 'w: 'a>(writer: &'a mut Writer<'w>) -> io::Result<Node<'a, 'w>> { +pub fn open<W: Write>(writer: &mut W) -> io::Result<Node<W>> { let mut node = Node { writer }; node.write(&wire::TOK_NAR)?; Ok(node) @@ -49,11 +46,11 @@ pub fn open<'a, 'w: 'a>(writer: &'a mut Writer<'w>) -> io::Result<Node<'a, 'w>> /// /// A NAR can be thought of as a tree of nodes represented by this type. Each /// node can be a file, a symlink or a directory containing other nodes. -pub struct Node<'a, 'w: 'a> { - writer: &'a mut Writer<'w>, +pub struct Node<'a, W: Write> { + writer: &'a mut W, } -impl<'a, 'w> Node<'a, 'w> { +impl<'a, W: Write> Node<'a, W> { fn write(&mut self, data: &[u8]) -> io::Result<()> { self.writer.write_all(data) } @@ -123,12 +120,59 @@ impl<'a, 'w> Node<'a, 'w> { Ok(()) } + /// Make this node a single file but let the user handle the writing of the file contents. + /// The user gets access to a writer to write the file contents to, plus a struct they must + /// invoke a function on to finish writing the NAR file. + /// + /// It is the caller's responsibility to write the correct number of bytes to the writer and + /// invoke [`FileManualWrite::close`], or invalid archives will be produced silently. + /// + /// ```rust + /// # use std::io::BufReader; + /// # use std::io::Write; + /// # + /// # // Output location to write the NAR to. + /// # let mut sink: Vec<u8> = Vec::new(); + /// # + /// # // Instantiate writer for this output location. + /// # let mut nar = nix_compat::nar::writer::open(&mut sink)?; + /// # + /// let contents = "Hello world\n".as_bytes(); + /// let size = contents.len() as u64; + /// let executable = false; + /// + /// let (writer, skip) = nar + /// .file_manual_write(executable, size)?; + /// + /// // Write the contents + /// writer.write_all(&contents)?; + /// + /// // Close the file node + /// skip.close(writer)?; + /// # Ok::<(), std::io::Error>(()) + /// ``` + pub fn file_manual_write( + mut self, + executable: bool, + size: u64, + ) -> io::Result<(&'a mut W, FileManualWrite)> { + self.write(if executable { + &wire::TOK_EXE + } else { + &wire::TOK_REG + })?; + + self.write(&size.to_le_bytes())?; + + Ok((self.writer, FileManualWrite { size })) + } + /// Make this node a directory, the content of which is set using the /// resulting [`Directory`] value. /// /// It is the caller's responsibility to invoke [`Directory::close`], /// or invalid archives will be produced silently. - pub fn directory(mut self) -> io::Result<Directory<'a, 'w>> { + pub fn directory(mut self) -> io::Result<Directory<'a, W>> { self.write(&wire::TOK_DIR)?; Ok(Directory::new(self)) } @@ -145,13 +189,13 @@ fn into_name(_name: &[u8]) -> Name { } /// Content of a NAR node that represents a directory. -pub struct Directory<'a, 'w> { - node: Node<'a, 'w>, +pub struct Directory<'a, W: Write> { + node: Node<'a, W>, prev_name: Option<Name>, } -impl<'a, 'w> Directory<'a, 'w> { - fn new(node: Node<'a, 'w>) -> Self { +impl<'a, W: Write> Directory<'a, W> { + fn new(node: Node<'a, W>) -> Self { Self { node, prev_name: None, @@ -166,7 +210,7 @@ impl<'a, 'w> Directory<'a, 'w> { /// It is the caller's responsibility to ensure that directory entries are /// written in order of ascending name. If this is not ensured, this method /// may panic or silently produce invalid archives. - pub fn entry(&mut self, name: &[u8]) -> io::Result<Node<'_, 'w>> { + pub fn entry(&mut self, name: &[u8]) -> io::Result<Node<'_, W>> { debug_assert!( name.len() <= wire::MAX_NAME_LEN, "name.len() > {}", @@ -222,3 +266,24 @@ impl<'a, 'w> Directory<'a, 'w> { Ok(()) } } + +/// Content of a NAR node that represents a file whose contents are being written out manually. +/// Returned by the `file_manual_write` function. +#[must_use] +pub struct FileManualWrite { + size: u64, +} + +impl FileManualWrite { + /// Finish writing the file structure to the NAR after having manually written the file contents. + /// + /// **Important:** This *must* be called with the writer returned by file_manual_write after + /// the file contents have been manually and fully written. Otherwise the resulting NAR file + /// will be invalid. + pub fn close<W: Write>(self, writer: &mut W) -> io::Result<()> { + let mut node = Node { writer }; + node.pad(self.size)?; + node.write(&wire::TOK_PAR)?; + Ok(()) + } +} |