From a23b7e17c04453a4d5ea2d47a88c6c6874471c08 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Tue, 31 Jan 2023 15:08:25 +0100 Subject: refactor(tvix/nix-compat): absorb nar writer Expose it at `nix_compat::nar::writer`. Change-Id: I98a8acfa6277074f24608d64fb6c0082b41d10f5 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7969 Autosubmit: flokli Reviewed-by: tazjin Tested-by: BuildkiteCI --- tvix/Cargo.lock | 4 - tvix/Cargo.toml | 1 - tvix/nar/Cargo.toml | 4 - tvix/nar/default.nix | 5 - tvix/nar/src/lib.rs | 206 --------------------------------- tvix/nar/src/wire.rs | 46 -------- tvix/nix-compat/src/lib.rs | 1 + tvix/nix-compat/src/nar/mod.rs | 1 + tvix/nix-compat/src/nar/writer/mod.rs | 206 +++++++++++++++++++++++++++++++++ tvix/nix-compat/src/nar/writer/wire.rs | 46 ++++++++ 10 files changed, 254 insertions(+), 266 deletions(-) delete mode 100644 tvix/nar/Cargo.toml delete mode 100644 tvix/nar/default.nix delete mode 100644 tvix/nar/src/lib.rs delete mode 100644 tvix/nar/src/wire.rs create mode 100644 tvix/nix-compat/src/nar/mod.rs create mode 100644 tvix/nix-compat/src/nar/writer/mod.rs create mode 100644 tvix/nix-compat/src/nar/writer/wire.rs diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 7060c77314..bc0bafc00e 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -2675,10 +2675,6 @@ dependencies = [ "tvix-eval", ] -[[package]] -name = "tvix-nar" -version = "0.0.0" - [[package]] name = "tvix-serde" version = "0.1.0" diff --git a/tvix/Cargo.toml b/tvix/Cargo.toml index 079d36218b..314e329e6f 100644 --- a/tvix/Cargo.toml +++ b/tvix/Cargo.toml @@ -21,7 +21,6 @@ members = [ "cli", "eval", "eval/builtin-macros", - "nar", "nix_cli", "nix-compat", "serde", diff --git a/tvix/nar/Cargo.toml b/tvix/nar/Cargo.toml deleted file mode 100644 index 6ec8072c3a..0000000000 --- a/tvix/nar/Cargo.toml +++ /dev/null @@ -1,4 +0,0 @@ -[package] -name = "tvix-nar" -version = "0.0.0" -edition = "2021" diff --git a/tvix/nar/default.nix b/tvix/nar/default.nix deleted file mode 100644 index 4d62add059..0000000000 --- a/tvix/nar/default.nix +++ /dev/null @@ -1,5 +0,0 @@ -{ depot, ... }: - -depot.tvix.crates.workspaceMembers.tvix-nar.build.override { - runTests = true; -} diff --git a/tvix/nar/src/lib.rs b/tvix/nar/src/lib.rs deleted file mode 100644 index 62d2be5014..0000000000 --- a/tvix/nar/src/lib.rs +++ /dev/null @@ -1,206 +0,0 @@ -//! Implements an interface for writing the Nix archive format (NAR). -//! -//! NAR files (and their hashed representations) are used in C++ Nix for -//! addressing fixed-output derivations and a variety of other things. -//! -//! NAR files can be output to any type that implements [`Write`], and content -//! can be read from any type that implementes [`BufRead`]. -//! -//! Writing a single file might look like this: -//! -//! ```rust -//! # use std::io::BufReader; -//! # let some_file: Vec = vec![0, 1, 2, 3, 4]; -//! -//! // Output location to write the NAR to. -//! let mut sink: Vec = Vec::new(); -//! -//! // Instantiate writer for this output location. -//! let mut nar = tvix_nar::open(&mut sink)?; -//! -//! // Acquire metadata for the single file to output, and pass it in a -//! // `BufRead`-implementing type. -//! -//! let executable = false; -//! let size = some_file.len() as u64; -//! let mut reader = BufReader::new(some_file.as_slice()); -//! nar.file(executable, size, &mut reader)?; -//! # Ok::<(), std::io::Error>(()) -//! ``` - -use std::io::{self, BufRead, ErrorKind::UnexpectedEof, Write}; - -mod wire; - -/// Convenience type alias for types implementing [`Write`]. -pub type Writer<'a> = dyn Write + 'a; - -/// Create a new NAR, writing the output to the specified writer. -pub fn open<'a, 'w: 'a>(writer: &'a mut Writer<'w>) -> io::Result> { - let mut node = Node { writer }; - node.write(&wire::TOK_NAR)?; - Ok(node) -} - -/// Single node in a NAR file. -/// -/// A NAR can be thought of as a tree of nodes represented by this type. Each -/// node can be a file, a symlink or a directory containing other nodes. -pub struct Node<'a, 'w: 'a> { - writer: &'a mut Writer<'w>, -} - -impl<'a, 'w> Node<'a, 'w> { - fn write(&mut self, data: &[u8]) -> io::Result<()> { - self.writer.write_all(data) - } - - fn pad(&mut self, n: u64) -> io::Result<()> { - match (n & 7) as usize { - 0 => Ok(()), - n => self.write(&[0; 8][n..]), - } - } - - /// Make this node a symlink. - pub fn symlink(mut self, target: &str) -> io::Result<()> { - debug_assert!( - target.len() <= wire::MAX_TARGET_LEN, - "target.len() > {}", - wire::MAX_TARGET_LEN - ); - debug_assert!( - !target.contains('\0'), - "invalid target characters: {target:?}" - ); - debug_assert!(!target.is_empty(), "empty target"); - - self.write(&wire::TOK_SYM)?; - self.write(&target.len().to_le_bytes())?; - self.write(target.as_bytes())?; - self.pad(target.len() as u64)?; - self.write(&wire::TOK_PAR)?; - Ok(()) - } - - /// Make this node a single file. - pub fn file(mut self, executable: bool, size: u64, reader: &mut dyn BufRead) -> io::Result<()> { - self.write(if executable { - &wire::TOK_EXE - } else { - &wire::TOK_REG - })?; - - self.write(&size.to_le_bytes())?; - - let mut need = size; - while need != 0 { - let data = reader.fill_buf()?; - - if data.is_empty() { - return Err(UnexpectedEof.into()); - } - - let n = need.min(data.len() as u64) as usize; - self.write(&data[..n])?; - - need -= n as u64; - reader.consume(n); - } - - self.pad(size)?; - self.write(&wire::TOK_PAR)?; - - Ok(()) - } - - /// Make this node a directory, the content of which is set using the - /// resulting [`Directory`] value. - pub fn directory(mut self) -> io::Result> { - self.write(&wire::TOK_DIR)?; - Ok(Directory::new(self)) - } -} - -#[cfg(debug_assertions)] -type Name = String; -#[cfg(not(debug_assertions))] -type Name = (); - -fn into_name(_name: &str) -> Name { - #[cfg(debug_assertions)] - _name.to_owned() -} - -/// Content of a NAR node that represents a directory. -pub struct Directory<'a, 'w> { - node: Node<'a, 'w>, - prev_name: Option, -} - -impl<'a, 'w> Directory<'a, 'w> { - fn new(node: Node<'a, 'w>) -> Self { - Self { - node, - prev_name: None, - } - } - - /// Add an entry to the directory. - /// - /// The entry is simply another [`Node`], which can then be filled like the - /// root of a NAR (including, of course, by nesting directories). - pub fn entry(&mut self, name: &str) -> io::Result> { - debug_assert!( - name.len() <= wire::MAX_NAME_LEN, - "name.len() > {}", - wire::MAX_NAME_LEN - ); - debug_assert!(!["", ".", ".."].contains(&name), "invalid name: {name:?}"); - debug_assert!( - !name.contains(['/', '\0']), - "invalid name characters: {name:?}" - ); - - match self.prev_name { - None => { - self.prev_name = Some(into_name(name)); - } - Some(ref mut _prev_name) => { - #[cfg(debug_assertions)] - { - assert!( - &**_prev_name < name, - "misordered names: {_prev_name:?} >= {name:?}" - ); - _prev_name.clear(); - _prev_name.push_str(name); - } - self.node.write(&wire::TOK_PAR)?; - } - } - - self.node.write(&wire::TOK_ENT)?; - self.node.write(&name.len().to_le_bytes())?; - self.node.write(name.as_bytes())?; - self.node.pad(name.len() as u64)?; - self.node.write(&wire::TOK_NOD)?; - - Ok(Node { - writer: &mut *self.node.writer, - }) - } - - /// Close a directory and write terminators for the directory to the NAR. - /// - /// **Important:** This *must* be called when all entries have been written - /// in a directory, otherwise the resulting NAR file will be invalid. - pub fn close(mut self) -> io::Result<()> { - if self.prev_name.is_some() { - self.node.write(&wire::TOK_PAR)?; - } - - self.node.write(&wire::TOK_PAR)?; - Ok(()) - } -} diff --git a/tvix/nar/src/wire.rs b/tvix/nar/src/wire.rs deleted file mode 100644 index 98581ae3aa..0000000000 --- a/tvix/nar/src/wire.rs +++ /dev/null @@ -1,46 +0,0 @@ -pub const MAX_NAME_LEN: usize = 255; -pub const MAX_TARGET_LEN: usize = 4095; - -#[cfg(test)] -fn token(xs: &[&str]) -> Vec { - let mut out = vec![]; - for x in xs { - let len = x.len() as u64; - out.extend_from_slice(&len.to_le_bytes()); - out.extend_from_slice(x.as_bytes()); - - let n = x.len() & 7; - if n != 0 { - const ZERO: [u8; 8] = [0; 8]; - out.extend_from_slice(&ZERO[n..]); - } - } - out -} - -pub const TOK_NAR: [u8; 56] = *b"\x0d\0\0\0\0\0\0\0nix-archive-1\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; -pub const TOK_REG: [u8; 32] = *b"\x07\0\0\0\0\0\0\0regular\0\x08\0\0\0\0\0\0\0contents"; -pub const TOK_EXE: [u8; 64] = *b"\x07\0\0\0\0\0\0\0regular\0\x0a\0\0\0\0\0\0\0executable\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0contents"; -pub const TOK_SYM: [u8; 32] = *b"\x07\0\0\0\0\0\0\0symlink\0\x06\0\0\0\0\0\0\0target\0\0"; -pub const TOK_DIR: [u8; 24] = *b"\x09\0\0\0\0\0\0\0directory\0\0\0\0\0\0\0"; -pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0name\0\0\0\0"; -pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; -pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0"; - -#[test] -fn tokens() { - let cases: &[(&[u8], &[&str])] = &[ - (&TOK_NAR, &["nix-archive-1", "(", "type"]), - (&TOK_REG, &["regular", "contents"]), - (&TOK_EXE, &["regular", "executable", "", "contents"]), - (&TOK_SYM, &["symlink", "target"]), - (&TOK_DIR, &["directory"]), - (&TOK_ENT, &["entry", "(", "name"]), - (&TOK_NOD, &["node", "(", "type"]), - (&TOK_PAR, &[")"]), - ]; - - for &(tok, xs) in cases { - assert_eq!(tok, token(xs)); - } -} diff --git a/tvix/nix-compat/src/lib.rs b/tvix/nix-compat/src/lib.rs index 59d687ee7f..60775ad90e 100644 --- a/tvix/nix-compat/src/lib.rs +++ b/tvix/nix-compat/src/lib.rs @@ -1,3 +1,4 @@ pub mod derivation; +pub mod nar; pub mod nixbase32; pub mod store_path; diff --git a/tvix/nix-compat/src/nar/mod.rs b/tvix/nix-compat/src/nar/mod.rs new file mode 100644 index 0000000000..d3baa81782 --- /dev/null +++ b/tvix/nix-compat/src/nar/mod.rs @@ -0,0 +1 @@ +pub mod writer; diff --git a/tvix/nix-compat/src/nar/writer/mod.rs b/tvix/nix-compat/src/nar/writer/mod.rs new file mode 100644 index 0000000000..9f9640e273 --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/mod.rs @@ -0,0 +1,206 @@ +//! Implements an interface for writing the Nix archive format (NAR). +//! +//! NAR files (and their hashed representations) are used in C++ Nix for +//! addressing fixed-output derivations and a variety of other things. +//! +//! NAR files can be output to any type that implements [`Write`], and content +//! can be read from any type that implementes [`BufRead`]. +//! +//! Writing a single file might look like this: +//! +//! ```rust +//! # use std::io::BufReader; +//! # let some_file: Vec = vec![0, 1, 2, 3, 4]; +//! +//! // Output location to write the NAR to. +//! let mut sink: Vec = Vec::new(); +//! +//! // Instantiate writer for this output location. +//! let mut nar = nix_compat::nar::writer::open(&mut sink)?; +//! +//! // Acquire metadata for the single file to output, and pass it in a +//! // `BufRead`-implementing type. +//! +//! let executable = false; +//! let size = some_file.len() as u64; +//! let mut reader = BufReader::new(some_file.as_slice()); +//! nar.file(executable, size, &mut reader)?; +//! # Ok::<(), std::io::Error>(()) +//! ``` + +use std::io::{self, BufRead, ErrorKind::UnexpectedEof, Write}; + +mod wire; + +/// Convenience type alias for types implementing [`Write`]. +pub type Writer<'a> = dyn Write + 'a; + +/// Create a new NAR, writing the output to the specified writer. +pub fn open<'a, 'w: 'a>(writer: &'a mut Writer<'w>) -> io::Result> { + let mut node = Node { writer }; + node.write(&wire::TOK_NAR)?; + Ok(node) +} + +/// Single node in a NAR file. +/// +/// A NAR can be thought of as a tree of nodes represented by this type. Each +/// node can be a file, a symlink or a directory containing other nodes. +pub struct Node<'a, 'w: 'a> { + writer: &'a mut Writer<'w>, +} + +impl<'a, 'w> Node<'a, 'w> { + fn write(&mut self, data: &[u8]) -> io::Result<()> { + self.writer.write_all(data) + } + + fn pad(&mut self, n: u64) -> io::Result<()> { + match (n & 7) as usize { + 0 => Ok(()), + n => self.write(&[0; 8][n..]), + } + } + + /// Make this node a symlink. + pub fn symlink(mut self, target: &str) -> io::Result<()> { + debug_assert!( + target.len() <= wire::MAX_TARGET_LEN, + "target.len() > {}", + wire::MAX_TARGET_LEN + ); + debug_assert!( + !target.contains('\0'), + "invalid target characters: {target:?}" + ); + debug_assert!(!target.is_empty(), "empty target"); + + self.write(&wire::TOK_SYM)?; + self.write(&target.len().to_le_bytes())?; + self.write(target.as_bytes())?; + self.pad(target.len() as u64)?; + self.write(&wire::TOK_PAR)?; + Ok(()) + } + + /// Make this node a single file. + pub fn file(mut self, executable: bool, size: u64, reader: &mut dyn BufRead) -> io::Result<()> { + self.write(if executable { + &wire::TOK_EXE + } else { + &wire::TOK_REG + })?; + + self.write(&size.to_le_bytes())?; + + let mut need = size; + while need != 0 { + let data = reader.fill_buf()?; + + if data.is_empty() { + return Err(UnexpectedEof.into()); + } + + let n = need.min(data.len() as u64) as usize; + self.write(&data[..n])?; + + need -= n as u64; + reader.consume(n); + } + + self.pad(size)?; + self.write(&wire::TOK_PAR)?; + + Ok(()) + } + + /// Make this node a directory, the content of which is set using the + /// resulting [`Directory`] value. + pub fn directory(mut self) -> io::Result> { + self.write(&wire::TOK_DIR)?; + Ok(Directory::new(self)) + } +} + +#[cfg(debug_assertions)] +type Name = String; +#[cfg(not(debug_assertions))] +type Name = (); + +fn into_name(_name: &str) -> Name { + #[cfg(debug_assertions)] + _name.to_owned() +} + +/// Content of a NAR node that represents a directory. +pub struct Directory<'a, 'w> { + node: Node<'a, 'w>, + prev_name: Option, +} + +impl<'a, 'w> Directory<'a, 'w> { + fn new(node: Node<'a, 'w>) -> Self { + Self { + node, + prev_name: None, + } + } + + /// Add an entry to the directory. + /// + /// The entry is simply another [`Node`], which can then be filled like the + /// root of a NAR (including, of course, by nesting directories). + pub fn entry(&mut self, name: &str) -> io::Result> { + debug_assert!( + name.len() <= wire::MAX_NAME_LEN, + "name.len() > {}", + wire::MAX_NAME_LEN + ); + debug_assert!(!["", ".", ".."].contains(&name), "invalid name: {name:?}"); + debug_assert!( + !name.contains(['/', '\0']), + "invalid name characters: {name:?}" + ); + + match self.prev_name { + None => { + self.prev_name = Some(into_name(name)); + } + Some(ref mut _prev_name) => { + #[cfg(debug_assertions)] + { + assert!( + &**_prev_name < name, + "misordered names: {_prev_name:?} >= {name:?}" + ); + _prev_name.clear(); + _prev_name.push_str(name); + } + self.node.write(&wire::TOK_PAR)?; + } + } + + self.node.write(&wire::TOK_ENT)?; + self.node.write(&name.len().to_le_bytes())?; + self.node.write(name.as_bytes())?; + self.node.pad(name.len() as u64)?; + self.node.write(&wire::TOK_NOD)?; + + Ok(Node { + writer: &mut *self.node.writer, + }) + } + + /// Close a directory and write terminators for the directory to the NAR. + /// + /// **Important:** This *must* be called when all entries have been written + /// in a directory, otherwise the resulting NAR file will be invalid. + pub fn close(mut self) -> io::Result<()> { + if self.prev_name.is_some() { + self.node.write(&wire::TOK_PAR)?; + } + + self.node.write(&wire::TOK_PAR)?; + Ok(()) + } +} diff --git a/tvix/nix-compat/src/nar/writer/wire.rs b/tvix/nix-compat/src/nar/writer/wire.rs new file mode 100644 index 0000000000..98581ae3aa --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/wire.rs @@ -0,0 +1,46 @@ +pub const MAX_NAME_LEN: usize = 255; +pub const MAX_TARGET_LEN: usize = 4095; + +#[cfg(test)] +fn token(xs: &[&str]) -> Vec { + let mut out = vec![]; + for x in xs { + let len = x.len() as u64; + out.extend_from_slice(&len.to_le_bytes()); + out.extend_from_slice(x.as_bytes()); + + let n = x.len() & 7; + if n != 0 { + const ZERO: [u8; 8] = [0; 8]; + out.extend_from_slice(&ZERO[n..]); + } + } + out +} + +pub const TOK_NAR: [u8; 56] = *b"\x0d\0\0\0\0\0\0\0nix-archive-1\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; +pub const TOK_REG: [u8; 32] = *b"\x07\0\0\0\0\0\0\0regular\0\x08\0\0\0\0\0\0\0contents"; +pub const TOK_EXE: [u8; 64] = *b"\x07\0\0\0\0\0\0\0regular\0\x0a\0\0\0\0\0\0\0executable\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0contents"; +pub const TOK_SYM: [u8; 32] = *b"\x07\0\0\0\0\0\0\0symlink\0\x06\0\0\0\0\0\0\0target\0\0"; +pub const TOK_DIR: [u8; 24] = *b"\x09\0\0\0\0\0\0\0directory\0\0\0\0\0\0\0"; +pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0name\0\0\0\0"; +pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; +pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0"; + +#[test] +fn tokens() { + let cases: &[(&[u8], &[&str])] = &[ + (&TOK_NAR, &["nix-archive-1", "(", "type"]), + (&TOK_REG, &["regular", "contents"]), + (&TOK_EXE, &["regular", "executable", "", "contents"]), + (&TOK_SYM, &["symlink", "target"]), + (&TOK_DIR, &["directory"]), + (&TOK_ENT, &["entry", "(", "name"]), + (&TOK_NOD, &["node", "(", "type"]), + (&TOK_PAR, &[")"]), + ]; + + for &(tok, xs) in cases { + assert_eq!(tok, token(xs)); + } +} -- cgit 1.4.1