diff options
Diffstat (limited to 'tvix/nix-compat/src/nar')
-rw-r--r-- | tvix/nix-compat/src/nar/mod.rs | 4 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/async/mod.rs | 173 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/async/read.rs | 69 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/async/test.rs | 310 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/mod.rs | 477 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/read.rs | 141 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/test.rs | 278 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/tests/complicated.nar | bin | 0 -> 840 bytes | |||
-rw-r--r-- | tvix/nix-compat/src/nar/tests/helloworld.nar | bin | 0 -> 128 bytes | |||
-rw-r--r-- | tvix/nix-compat/src/nar/tests/symlink.nar | bin | 0 -> 136 bytes | |||
-rw-r--r-- | tvix/nix-compat/src/nar/wire/mod.rs | 150 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/wire/tag.rs | 166 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/writer/async.rs | 235 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/writer/mod.rs | 9 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/writer/sync.rs | 224 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/writer/test.rs | 128 |
16 files changed, 2364 insertions, 0 deletions
diff --git a/tvix/nix-compat/src/nar/mod.rs b/tvix/nix-compat/src/nar/mod.rs new file mode 100644 index 0000000000..c678d26ffb --- /dev/null +++ b/tvix/nix-compat/src/nar/mod.rs @@ -0,0 +1,4 @@ +pub(crate) mod wire; + +pub mod reader; +pub mod writer; diff --git a/tvix/nix-compat/src/nar/reader/async/mod.rs b/tvix/nix-compat/src/nar/reader/async/mod.rs new file mode 100644 index 0000000000..0808fba38c --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/async/mod.rs @@ -0,0 +1,173 @@ +use std::{ + mem::MaybeUninit, + pin::Pin, + task::{self, Poll}, +}; + +use tokio::io::{self, AsyncBufRead, AsyncRead, ErrorKind::InvalidData}; + +// Required reading for understanding this module. +use crate::{ + nar::{self, wire::PadPar}, + wire::{self, BytesReader}, +}; + +mod read; +#[cfg(test)] +mod test; + +pub type Reader<'a> = dyn AsyncBufRead + Unpin + Send + 'a; + +/// Start reading a NAR file from `reader`. +pub async fn open<'a, 'r>(reader: &'a mut Reader<'r>) -> io::Result<Node<'a, 'r>> { + read::token(reader, &nar::wire::TOK_NAR).await?; + Node::new(reader).await +} + +pub enum Node<'a, 'r: 'a> { + Symlink { + target: Vec<u8>, + }, + File { + executable: bool, + reader: FileReader<'a, 'r>, + }, + Directory(DirReader<'a, 'r>), +} + +impl<'a, 'r: 'a> Node<'a, 'r> { + /// Start reading a [Node], matching the next [wire::Node]. + /// + /// Reading the terminating [wire::TOK_PAR] is done immediately for [Node::Symlink], + /// but is otherwise left to [DirReader] or [BytesReader]. + async fn new(reader: &'a mut Reader<'r>) -> io::Result<Self> { + Ok(match read::tag(reader).await? { + nar::wire::Node::Sym => { + let target = wire::read_bytes(reader, 1..=nar::wire::MAX_TARGET_LEN).await?; + + if target.contains(&0) { + return Err(InvalidData.into()); + } + + read::token(reader, &nar::wire::TOK_PAR).await?; + + Node::Symlink { target } + } + tag @ (nar::wire::Node::Reg | nar::wire::Node::Exe) => Node::File { + executable: tag == nar::wire::Node::Exe, + reader: FileReader { + inner: BytesReader::new_internal(reader, ..).await?, + }, + }, + nar::wire::Node::Dir => Node::Directory(DirReader::new(reader)), + }) + } +} + +/// File contents, readable through the [AsyncRead] trait. +/// +/// It comes with some caveats: +/// * You must always read the entire file, unless you intend to abandon the entire archive reader. +/// * You must abandon the entire archive reader upon the first error. +/// +/// It's fine to read exactly `reader.len()` bytes without ever seeing an explicit EOF. +pub struct FileReader<'a, 'r> { + inner: BytesReader<&'a mut Reader<'r>, PadPar>, +} + +impl<'a, 'r> FileReader<'a, 'r> { + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn len(&self) -> u64 { + self.inner.len() + } +} + +impl<'a, 'r> AsyncRead for FileReader<'a, 'r> { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut task::Context, + buf: &mut io::ReadBuf, + ) -> Poll<io::Result<()>> { + Pin::new(&mut self.get_mut().inner).poll_read(cx, buf) + } +} + +impl<'a, 'r> AsyncBufRead for FileReader<'a, 'r> { + fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut task::Context) -> Poll<io::Result<&[u8]>> { + Pin::new(&mut self.get_mut().inner).poll_fill_buf(cx) + } + + fn consume(self: Pin<&mut Self>, amt: usize) { + Pin::new(&mut self.get_mut().inner).consume(amt) + } +} + +/// A directory iterator, yielding a sequence of [Node]s. +/// It must be fully consumed before reading further from the [DirReader] that produced it, if any. +pub struct DirReader<'a, 'r> { + reader: &'a mut Reader<'r>, + /// Previous directory entry name. + /// We have to hang onto this to enforce name monotonicity. + prev_name: Vec<u8>, +} + +pub struct Entry<'a, 'r> { + pub name: &'a [u8], + pub node: Node<'a, 'r>, +} + +impl<'a, 'r> DirReader<'a, 'r> { + fn new(reader: &'a mut Reader<'r>) -> Self { + Self { + reader, + prev_name: vec![], + } + } + + /// Read the next [Entry] from the directory. + /// + /// We explicitly don't implement [Iterator], since treating this as + /// a regular Rust iterator will surely lead you astray. + /// + /// * You must always consume the entire iterator, unless you abandon the entire archive reader. + /// * You must abandon the entire archive reader on the first error. + /// * You must abandon the directory reader upon the first [None]. + /// * Even if you know the amount of elements up front, you must keep reading until you encounter [None]. + pub async fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> { + // COME FROM the previous iteration: if we've already read an entry, + // read its terminating TOK_PAR here. + if !self.prev_name.is_empty() { + read::token(self.reader, &nar::wire::TOK_PAR).await?; + } + + if let nar::wire::Entry::None = read::tag(self.reader).await? { + return Ok(None); + } + + let mut name = [MaybeUninit::uninit(); nar::wire::MAX_NAME_LEN + 1]; + let name = + wire::read_bytes_buf(self.reader, &mut name, 1..=nar::wire::MAX_NAME_LEN).await?; + + if name.contains(&0) || name.contains(&b'/') || name == b"." || name == b".." { + return Err(InvalidData.into()); + } + + // Enforce strict monotonicity of directory entry names. + if &self.prev_name[..] >= name { + return Err(InvalidData.into()); + } + + self.prev_name.clear(); + self.prev_name.extend_from_slice(name); + + read::token(self.reader, &nar::wire::TOK_NOD).await?; + + Ok(Some(Entry { + name: &self.prev_name, + node: Node::new(self.reader).await?, + })) + } +} diff --git a/tvix/nix-compat/src/nar/reader/async/read.rs b/tvix/nix-compat/src/nar/reader/async/read.rs new file mode 100644 index 0000000000..2adf894922 --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/async/read.rs @@ -0,0 +1,69 @@ +use tokio::io::{ + self, AsyncReadExt, + ErrorKind::{InvalidData, UnexpectedEof}, +}; + +use crate::nar::wire::Tag; + +use super::Reader; + +/// Consume a known token from the reader. +pub async fn token<const N: usize>(reader: &mut Reader<'_>, token: &[u8; N]) -> io::Result<()> { + let mut buf = [0u8; N]; + + // This implements something similar to [AsyncReadExt::read_exact], but verifies that + // the input data matches the token while we read it. These two slices respectively + // represent the remaining token to be verified, and the remaining input buffer. + let mut token = &token[..]; + let mut buf = &mut buf[..]; + + while !token.is_empty() { + match reader.read(buf).await? { + 0 => { + return Err(UnexpectedEof.into()); + } + n => { + let (t, b); + (t, token) = token.split_at(n); + (b, buf) = buf.split_at_mut(n); + + if t != b { + return Err(InvalidData.into()); + } + } + } + } + + Ok(()) +} + +/// Consume a [Tag] from the reader. +pub async fn tag<T: Tag>(reader: &mut Reader<'_>) -> io::Result<T> { + let mut buf = T::make_buf(); + let buf = buf.as_mut(); + + // first read the known minimum length… + reader.read_exact(&mut buf[..T::MIN]).await?; + + // then decide which tag we're expecting + let tag = T::from_u8(buf[T::OFF]).ok_or(InvalidData)?; + let (head, tail) = tag.as_bytes().split_at(T::MIN); + + // make sure what we've read so far is valid + if buf[..T::MIN] != *head { + return Err(InvalidData.into()); + } + + // …then read the rest, if any + if !tail.is_empty() { + let rest = tail.len(); + reader.read_exact(&mut buf[..rest]).await?; + + // and make sure it's what we expect + if buf[..rest] != *tail { + return Err(InvalidData.into()); + } + } + + Ok(tag) +} diff --git a/tvix/nix-compat/src/nar/reader/async/test.rs b/tvix/nix-compat/src/nar/reader/async/test.rs new file mode 100644 index 0000000000..7bc1f8942f --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/async/test.rs @@ -0,0 +1,310 @@ +use tokio::io::AsyncReadExt; + +mod nar { + pub use crate::nar::reader::r#async as reader; +} + +#[tokio::test] +async fn symlink() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/symlink.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Symlink { target } => { + assert_eq!( + &b"/nix/store/somewhereelse"[..], + &target, + "target must match" + ); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +async fn file() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/helloworld.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + let mut buf = vec![]; + reader + .read_to_end(&mut buf) + .await + .expect("read must succeed"); + assert_eq!(&b"Hello World!"[..], &buf); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +async fn complicated() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + { + // first entry is .keep, an empty regular file. + let entry = subdir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"); + + must_read_file(".keep", entry).await; + } + + // we must read the None + assert!( + subdir_reader + .next() + .await + .expect("next must succeed") + .is_none(), + "keep directory contains only .keep" + ); + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // reading more entries yields None (and we actually must read until this) + assert!(dir_reader.next().await.expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +#[should_panic] +#[ignore = "TODO: async poisoning"] +async fn file_read_abandoned() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + { + let entry = dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"); + + assert_eq!(b".keep", entry.name); + // don't bother to finish reading it. + }; + + // this should panic (not return an error), because we are meant to abandon the archive reader now. + assert!(dir_reader.next().await.expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +#[should_panic] +#[ignore = "TODO: async poisoning"] +async fn dir_read_abandoned() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(_) => { + // don't finish using it, which poisons the archive reader + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // this should panic, because we didn't finish reading the child subdirectory + assert!(dir_reader.next().await.expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +#[should_panic] +#[ignore = "TODO: async poisoning"] +async fn dir_read_after_none() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + subdir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // we must read the None + assert!( + subdir_reader + .next() + .await + .expect("next must succeed") + .is_none(), + "keep directory contains only .keep" + ); + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // reading more entries yields None (and we actually must read until this) + assert!(dir_reader.next().await.expect("must succeed").is_none()); + + // this should panic, because we already got a none so we're meant to stop. + dir_reader.next().await.unwrap(); + unreachable!() + } + _ => panic!("unexpected type"), + } +} + +async fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { + assert_eq!(name.as_bytes(), entry.name); + + match entry.node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + assert_eq!(reader.read(&mut [0]).await.unwrap(), 0); + } + _ => panic!("unexpected type for {}", name), + } +} + +fn must_be_symlink( + name: &'static str, + exp_target: &'static str, + entry: nar::reader::Entry<'_, '_>, +) { + assert_eq!(name.as_bytes(), entry.name); + + match entry.node { + nar::reader::Node::Symlink { target } => { + assert_eq!(exp_target.as_bytes(), &target); + } + _ => panic!("unexpected type for {}", name), + } +} diff --git a/tvix/nix-compat/src/nar/reader/mod.rs b/tvix/nix-compat/src/nar/reader/mod.rs new file mode 100644 index 0000000000..7e9143c8f3 --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/mod.rs @@ -0,0 +1,477 @@ +//! Parser for the Nix archive format, aka NAR. +//! +//! NAR files (and their hashed representations) are used in C++ Nix for +//! a variety of things, including addressing fixed-output derivations +//! and transferring store paths between Nix stores. + +use std::io::{ + self, BufRead, + ErrorKind::{InvalidData, UnexpectedEof}, + Read, Write, +}; + +#[cfg(not(debug_assertions))] +use std::marker::PhantomData; + +// Required reading for understanding this module. +use crate::nar::wire; + +#[cfg(all(feature = "async", feature = "wire"))] +pub mod r#async; + +mod read; +#[cfg(test)] +mod test; + +pub type Reader<'a> = dyn BufRead + Send + 'a; + +struct ArchiveReader<'a, 'r> { + inner: &'a mut Reader<'r>, + + /// In debug mode, also track when we need to abandon this archive reader. + /// The archive reader must be abandoned when: + /// * An error is encountered at any point + /// * A file or directory reader is dropped before being read entirely. + /// All of these checks vanish in release mode. + status: ArchiveReaderStatus<'a>, +} + +macro_rules! try_or_poison { + ($it:expr, $ex:expr) => { + match $ex { + Ok(x) => x, + Err(e) => { + $it.status.poison(); + return Err(e.into()); + } + } + }; +} +/// Start reading a NAR file from `reader`. +pub fn open<'a, 'r>(reader: &'a mut Reader<'r>) -> io::Result<Node<'a, 'r>> { + read::token(reader, &wire::TOK_NAR)?; + Node::new(ArchiveReader { + inner: reader, + status: ArchiveReaderStatus::top(), + }) +} + +pub enum Node<'a, 'r> { + Symlink { + target: Vec<u8>, + }, + File { + executable: bool, + reader: FileReader<'a, 'r>, + }, + Directory(DirReader<'a, 'r>), +} + +impl<'a, 'r> Node<'a, 'r> { + /// Start reading a [Node], matching the next [wire::Node]. + /// + /// Reading the terminating [wire::TOK_PAR] is done immediately for [Node::Symlink], + /// but is otherwise left to [DirReader] or [FileReader]. + fn new(mut reader: ArchiveReader<'a, 'r>) -> io::Result<Self> { + Ok(match read::tag(reader.inner)? { + wire::Node::Sym => { + let target = + try_or_poison!(reader, read::bytes(reader.inner, wire::MAX_TARGET_LEN)); + + if target.is_empty() || target.contains(&0) { + reader.status.poison(); + return Err(InvalidData.into()); + } + + try_or_poison!(reader, read::token(reader.inner, &wire::TOK_PAR)); + reader.status.ready_parent(); // Immediately allow reading from parent again + + Node::Symlink { target } + } + tag @ (wire::Node::Reg | wire::Node::Exe) => { + let len = try_or_poison!(&mut reader, read::u64(reader.inner)); + + Node::File { + executable: tag == wire::Node::Exe, + reader: FileReader::new(reader, len)?, + } + } + wire::Node::Dir => Node::Directory(DirReader::new(reader)), + }) + } +} + +/// File contents, readable through the [Read] trait. +/// +/// It comes with some caveats: +/// * You must always read the entire file, unless you intend to abandon the entire archive reader. +/// * You must abandon the entire archive reader upon the first error. +/// +/// It's fine to read exactly `reader.len()` bytes without ever seeing an explicit EOF. +pub struct FileReader<'a, 'r> { + reader: ArchiveReader<'a, 'r>, + len: u64, + /// Truncated original file length for padding computation. + /// We only care about the 3 least significant bits; semantically, this is a u3. + pad: u8, +} + +impl<'a, 'r> FileReader<'a, 'r> { + /// Instantiate a new reader, starting after [wire::TOK_REG] or [wire::TOK_EXE]. + /// We handle the terminating [wire::TOK_PAR] on semantic EOF. + fn new(mut reader: ArchiveReader<'a, 'r>, len: u64) -> io::Result<Self> { + // For zero-length files, we have to read the terminating TOK_PAR + // immediately, since FileReader::read may never be called; we've + // already reached semantic EOF by definition. + if len == 0 { + read::token(reader.inner, &wire::TOK_PAR)?; + reader.status.ready_parent(); + } + + Ok(Self { + reader, + len, + pad: len as u8, + }) + } + + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + pub fn len(&self) -> u64 { + self.len + } +} + +impl FileReader<'_, '_> { + /// Equivalent to [BufRead::fill_buf] + /// + /// We can't directly implement [BufRead], because [FileReader::consume] needs + /// to perform fallible I/O. + pub fn fill_buf(&mut self) -> io::Result<&[u8]> { + if self.is_empty() { + return Ok(&[]); + } + + self.reader.check_correct(); + + let mut buf = try_or_poison!(self.reader, self.reader.inner.fill_buf()); + + if buf.is_empty() { + self.reader.status.poison(); + return Err(UnexpectedEof.into()); + } + + if buf.len() as u64 > self.len { + buf = &buf[..self.len as usize]; + } + + Ok(buf) + } + + /// Analogous to [BufRead::consume], differing only in that it needs + /// to perform I/O in order to read padding and terminators. + pub fn consume(&mut self, n: usize) -> io::Result<()> { + if n == 0 { + return Ok(()); + } + + self.reader.check_correct(); + + self.len = self + .len + .checked_sub(n as u64) + .expect("consumed bytes past EOF"); + + self.reader.inner.consume(n); + + if self.is_empty() { + self.finish()?; + } + + Ok(()) + } + + /// Copy the (remaining) contents of the file into `dst`. + pub fn copy(&mut self, mut dst: impl Write) -> io::Result<()> { + while !self.is_empty() { + let buf = self.fill_buf()?; + let n = try_or_poison!(self.reader, dst.write(buf)); + self.consume(n)?; + } + + Ok(()) + } +} + +impl Read for FileReader<'_, '_> { + fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> { + if buf.is_empty() || self.is_empty() { + return Ok(0); + } + + self.reader.check_correct(); + + if buf.len() as u64 > self.len { + buf = &mut buf[..self.len as usize]; + } + + let n = try_or_poison!(self.reader, self.reader.inner.read(buf)); + self.len -= n as u64; + + if n == 0 { + self.reader.status.poison(); + return Err(UnexpectedEof.into()); + } + + if self.is_empty() { + self.finish()?; + } + + Ok(n) + } +} + +impl FileReader<'_, '_> { + /// We've reached semantic EOF, consume and verify the padding and terminating TOK_PAR. + /// Files are padded to 64 bits (8 bytes), just like any other byte string in the wire format. + fn finish(&mut self) -> io::Result<()> { + let pad = (self.pad & 7) as usize; + + if pad != 0 { + let mut buf = [0; 8]; + try_or_poison!(self.reader, self.reader.inner.read_exact(&mut buf[pad..])); + + if buf != [0; 8] { + self.reader.status.poison(); + return Err(InvalidData.into()); + } + } + + try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR)); + + // Done with reading this file, allow going back up the chain of readers + self.reader.status.ready_parent(); + + Ok(()) + } +} + +/// A directory iterator, yielding a sequence of [Node]s. +/// It must be fully consumed before reading further from the [DirReader] that produced it, if any. +pub struct DirReader<'a, 'r> { + reader: ArchiveReader<'a, 'r>, + /// Previous directory entry name. + /// We have to hang onto this to enforce name monotonicity. + prev_name: Vec<u8>, +} + +pub struct Entry<'a, 'r> { + pub name: &'a [u8], + pub node: Node<'a, 'r>, +} + +impl<'a, 'r> DirReader<'a, 'r> { + fn new(reader: ArchiveReader<'a, 'r>) -> Self { + Self { + reader, + prev_name: vec![], + } + } + + /// Read the next [Entry] from the directory. + /// + /// We explicitly don't implement [Iterator], since treating this as + /// a regular Rust iterator will surely lead you astray. + /// + /// * You must always consume the entire iterator, unless you abandon the entire archive reader. + /// * You must abandon the entire archive reader on the first error. + /// * You must abandon the directory reader upon the first [None]. + /// * Even if you know the amount of elements up front, you must keep reading until you encounter [None]. + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> { + self.reader.check_correct(); + + // COME FROM the previous iteration: if we've already read an entry, + // read its terminating TOK_PAR here. + if !self.prev_name.is_empty() { + try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR)); + } + + // Determine if there are more entries to follow + if let wire::Entry::None = try_or_poison!(self.reader, read::tag(self.reader.inner)) { + // We've reached the end of this directory. + self.reader.status.ready_parent(); + return Ok(None); + } + + let mut name = [0; wire::MAX_NAME_LEN + 1]; + let name = try_or_poison!( + self.reader, + read::bytes_buf(self.reader.inner, &mut name, wire::MAX_NAME_LEN) + ); + + if name.is_empty() + || name.contains(&0) + || name.contains(&b'/') + || name == b"." + || name == b".." + { + self.reader.status.poison(); + return Err(InvalidData.into()); + } + + // Enforce strict monotonicity of directory entry names. + if &self.prev_name[..] >= name { + self.reader.status.poison(); + return Err(InvalidData.into()); + } + + self.prev_name.clear(); + self.prev_name.extend_from_slice(name); + + try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD)); + + Ok(Some(Entry { + name: &self.prev_name, + // Don't need to worry about poisoning here: Node::new will do it for us if needed + node: Node::new(self.reader.child())?, + })) + } +} + +/// We use a stack of statuses to: +/// * Share poisoned state across all objects from the same underlying reader, +/// so we can check they are abandoned when an error occurs +/// * Make sure only the most recently created object is read from, and is fully exhausted +/// before anything it was created from is used again. +enum ArchiveReaderStatus<'a> { + #[cfg(not(debug_assertions))] + None(PhantomData<&'a ()>), + #[cfg(debug_assertions)] + StackTop { poisoned: bool, ready: bool }, + #[cfg(debug_assertions)] + StackChild { + poisoned: &'a mut bool, + parent_ready: &'a mut bool, + ready: bool, + }, +} + +impl ArchiveReaderStatus<'_> { + fn top() -> Self { + #[cfg(debug_assertions)] + { + ArchiveReaderStatus::StackTop { + poisoned: false, + ready: true, + } + } + + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(PhantomData) + } + + /// Poison all the objects sharing the same reader, to be used when an error occurs + fn poison(&mut self) { + match self { + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => {} + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackTop { poisoned: x, .. } => *x = true, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackChild { poisoned: x, .. } => **x = true, + } + } + + /// Mark the parent as ready, allowing it to be used again and preventing this reference to the reader being used again. + fn ready_parent(&mut self) { + match self { + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => {} + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackTop { ready, .. } => { + *ready = false; + } + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackChild { + ready, + parent_ready, + .. + } => { + *ready = false; + **parent_ready = true; + } + }; + } + + fn poisoned(&self) -> bool { + match self { + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => false, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackTop { poisoned, .. } => *poisoned, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackChild { poisoned, .. } => **poisoned, + } + } + + fn ready(&self) -> bool { + match self { + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => true, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackTop { ready, .. } => *ready, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackChild { ready, .. } => *ready, + } + } +} + +impl<'a, 'r> ArchiveReader<'a, 'r> { + /// Create a new child reader from this one. + /// In debug mode, this reader will panic if called before the new child is exhausted / calls `ready_parent` + fn child(&mut self) -> ArchiveReader<'_, 'r> { + ArchiveReader { + inner: self.inner, + #[cfg(not(debug_assertions))] + status: ArchiveReaderStatus::None(PhantomData), + #[cfg(debug_assertions)] + status: match &mut self.status { + ArchiveReaderStatus::StackTop { poisoned, ready } => { + *ready = false; + ArchiveReaderStatus::StackChild { + poisoned, + parent_ready: ready, + ready: true, + } + } + ArchiveReaderStatus::StackChild { + poisoned, ready, .. + } => { + *ready = false; + ArchiveReaderStatus::StackChild { + poisoned, + parent_ready: ready, + ready: true, + } + } + }, + } + } + + /// Check the reader is in the correct status. + /// Only does anything when debug assertions are on. + #[inline(always)] + fn check_correct(&self) { + assert!( + !self.status.poisoned(), + "Archive reader used after it was meant to be abandoned!" + ); + assert!( + self.status.ready(), + "Non-ready archive reader used! (Should've been reading from something else)" + ); + } +} diff --git a/tvix/nix-compat/src/nar/reader/read.rs b/tvix/nix-compat/src/nar/reader/read.rs new file mode 100644 index 0000000000..9938581f2a --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/read.rs @@ -0,0 +1,141 @@ +//! Helpers for reading [crate::nar::wire] format. + +use std::io::{ + self, + ErrorKind::{Interrupted, InvalidData, UnexpectedEof}, +}; + +use super::Reader; +use crate::nar::wire::Tag; + +/// Consume a little-endian [prim@u64] from the reader. +pub fn u64(reader: &mut Reader) -> io::Result<u64> { + let mut buf = [0; 8]; + reader.read_exact(&mut buf)?; + Ok(u64::from_le_bytes(buf)) +} + +/// Consume a byte string from the reader into a provided buffer, +/// returning the data bytes. +pub fn bytes_buf<'a, const N: usize>( + reader: &mut Reader, + buf: &'a mut [u8; N], + max_len: usize, +) -> io::Result<&'a [u8]> { + assert_eq!(N % 8, 0); + assert!(max_len <= N); + + // read the length, and reject excessively large values + let len = self::u64(reader)?; + if len > max_len as u64 { + return Err(InvalidData.into()); + } + // we know the length fits in a usize now + let len = len as usize; + + // read the data and padding into a buffer + let buf_len = (len + 7) & !7; + reader.read_exact(&mut buf[..buf_len])?; + + // verify that the padding is all zeroes + for &b in &buf[len..buf_len] { + if b != 0 { + return Err(InvalidData.into()); + } + } + + Ok(&buf[..len]) +} + +/// Consume a byte string of up to `max_len` bytes from the reader. +pub fn bytes(reader: &mut Reader, max_len: usize) -> io::Result<Vec<u8>> { + assert!(max_len <= isize::MAX as usize); + + // read the length, and reject excessively large values + let len = self::u64(reader)?; + if len > max_len as u64 { + return Err(InvalidData.into()); + } + // we know the length fits in a usize now + let len = len as usize; + + // read the data and padding into a buffer + let buf_len = (len + 7) & !7; + let mut buf = vec![0; buf_len]; + reader.read_exact(&mut buf)?; + + // verify that the padding is all zeroes + for b in buf.drain(len..) { + if b != 0 { + return Err(InvalidData.into()); + } + } + + Ok(buf) +} + +/// Consume a known token from the reader. +pub fn token<const N: usize>(reader: &mut Reader, token: &[u8; N]) -> io::Result<()> { + let mut buf = [0u8; N]; + + // This implements something similar to [Read::read_exact], but verifies that + // the input data matches the token while we read it. These two slices respectively + // represent the remaining token to be verified, and the remaining input buffer. + let mut token = &token[..]; + let mut buf = &mut buf[..]; + + while !token.is_empty() { + match reader.read(buf) { + Ok(0) => { + return Err(UnexpectedEof.into()); + } + Ok(n) => { + let (t, b); + (t, token) = token.split_at(n); + (b, buf) = buf.split_at_mut(n); + + if t != b { + return Err(InvalidData.into()); + } + } + Err(e) => { + if e.kind() != Interrupted { + return Err(e); + } + } + } + } + + Ok(()) +} + +/// Consume a [Tag] from the reader. +pub fn tag<T: Tag>(reader: &mut Reader) -> io::Result<T> { + let mut buf = T::make_buf(); + let buf = buf.as_mut(); + + // first read the known minimum length… + reader.read_exact(&mut buf[..T::MIN])?; + + // then decide which tag we're expecting + let tag = T::from_u8(buf[T::OFF]).ok_or(InvalidData)?; + let (head, tail) = tag.as_bytes().split_at(T::MIN); + + // make sure what we've read so far is valid + if buf[..T::MIN] != *head { + return Err(InvalidData.into()); + } + + // …then read the rest, if any + if !tail.is_empty() { + let rest = tail.len(); + reader.read_exact(&mut buf[..rest])?; + + // and make sure it's what we expect + if buf[..rest] != *tail { + return Err(InvalidData.into()); + } + } + + Ok(tag) +} diff --git a/tvix/nix-compat/src/nar/reader/test.rs b/tvix/nix-compat/src/nar/reader/test.rs new file mode 100644 index 0000000000..63e4fb289f --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/test.rs @@ -0,0 +1,278 @@ +use std::io::Read; + +use crate::nar; + +#[test] +fn symlink() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/symlink.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Symlink { target } => { + assert_eq!( + &b"/nix/store/somewhereelse"[..], + &target, + "target must match" + ); + } + _ => panic!("unexpected type"), + } +} + +#[test] +fn file() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/helloworld.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + let mut buf = vec![]; + reader.read_to_end(&mut buf).expect("read must succeed"); + assert_eq!(&b"Hello World!"[..], &buf); + } + _ => panic!("unexpected type"), + } +} + +#[test] +fn complicated() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .expect("next must succeed") + .expect("must be some"), + ); + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + { + // first entry is .keep, an empty regular file. + let entry = subdir_reader + .next() + .expect("next must succeed") + .expect("must be some"); + + must_read_file(".keep", entry); + } + + // we must read the None + assert!( + subdir_reader.next().expect("next must succeed").is_none(), + "keep directory contains only .keep" + ); + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // reading more entries yields None (and we actually must read until this) + assert!(dir_reader.next().expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[test] +#[should_panic] +fn file_read_abandoned() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + { + let entry = dir_reader + .next() + .expect("next must succeed") + .expect("must be some"); + + assert_eq!(b".keep", entry.name); + // don't bother to finish reading it. + }; + + // this should panic (not return an error), because we are meant to abandon the archive reader now. + assert!(dir_reader.next().expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[test] +#[should_panic] +fn dir_read_abandoned() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .expect("next must succeed") + .expect("must be some"), + ); + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(_) => { + // don't finish using it, which poisons the archive reader + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // this should panic, because we didn't finish reading the child subdirectory + assert!(dir_reader.next().expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[test] +#[should_panic] +fn dir_read_after_none() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .expect("next must succeed") + .expect("must be some"), + ); + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + subdir_reader + .next() + .expect("next must succeed") + .expect("must be some"), + ); + + // we must read the None + assert!( + subdir_reader.next().expect("next must succeed").is_none(), + "keep directory contains only .keep" + ); + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // reading more entries yields None (and we actually must read until this) + assert!(dir_reader.next().expect("must succeed").is_none()); + + // this should panic, because we already got a none so we're meant to stop. + dir_reader.next().unwrap(); + unreachable!() + } + _ => panic!("unexpected type"), + } +} + +fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { + assert_eq!(name.as_bytes(), entry.name); + + match entry.node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + assert_eq!(reader.read(&mut [0]).unwrap(), 0); + } + _ => panic!("unexpected type for {}", name), + } +} + +fn must_be_symlink( + name: &'static str, + exp_target: &'static str, + entry: nar::reader::Entry<'_, '_>, +) { + assert_eq!(name.as_bytes(), entry.name); + + match entry.node { + nar::reader::Node::Symlink { target } => { + assert_eq!(exp_target.as_bytes(), &target); + } + _ => panic!("unexpected type for {}", name), + } +} diff --git a/tvix/nix-compat/src/nar/tests/complicated.nar b/tvix/nix-compat/src/nar/tests/complicated.nar new file mode 100644 index 0000000000..6a137f5fbb --- /dev/null +++ b/tvix/nix-compat/src/nar/tests/complicated.nar Binary files differdiff --git a/tvix/nix-compat/src/nar/tests/helloworld.nar b/tvix/nix-compat/src/nar/tests/helloworld.nar new file mode 100644 index 0000000000..2e12681152 --- /dev/null +++ b/tvix/nix-compat/src/nar/tests/helloworld.nar Binary files differdiff --git a/tvix/nix-compat/src/nar/tests/symlink.nar b/tvix/nix-compat/src/nar/tests/symlink.nar new file mode 100644 index 0000000000..7990e4ad5b --- /dev/null +++ b/tvix/nix-compat/src/nar/tests/symlink.nar Binary files differdiff --git a/tvix/nix-compat/src/nar/wire/mod.rs b/tvix/nix-compat/src/nar/wire/mod.rs new file mode 100644 index 0000000000..26da04e67c --- /dev/null +++ b/tvix/nix-compat/src/nar/wire/mod.rs @@ -0,0 +1,150 @@ +//! NAR wire format, without I/O details, since those differ between +//! the synchronous and asynchronous implementations. +//! +//! The wire format is an S-expression format, encoded onto the wire +//! using simple encoding rules. +//! +//! # Encoding +//! +//! Lengths are represented as 64-bit unsigned integers in little-endian +//! format. Byte strings, including file contents and syntactic strings +//! part of the grammar, are prefixed by their 64-bit length, and padded +//! to 8-byte (64-bit) alignment with zero bytes. The zero-length string +//! is therefore encoded as eight zero bytes representing its length. +//! +//! # Grammar +//! +//! The NAR grammar is as follows: +//! ```plain +//! archive ::= "nix-archive-1" node +//! +//! node ::= "(" "type" "symlink" "target" string ")" +//! ||= "(" "type" "regular" ("executable" "")? "contents" string ")" +//! ||= "(" "type" "directory" entry* ")" +//! +//! entry ::= "entry" "(" "name" string "node" node ")" +//! ``` +//! +//! We rewrite it to pull together the purely syntactic elements into +//! unified tokens, producing an equivalent grammar that can be parsed +//! and serialized more elegantly: +//! ```plain +//! archive ::= TOK_NAR node +//! node ::= TOK_SYM string TOK_PAR +//! ||= (TOK_REG | TOK_EXE) string TOK_PAR +//! ||= TOK_DIR entry* TOK_PAR +//! +//! entry ::= TOK_ENT string TOK_NOD node TOK_PAR +//! +//! TOK_NAR ::= "nix-archive-1" "(" "type" +//! TOK_SYM ::= "symlink" "target" +//! TOK_REG ::= "regular" "contents" +//! TOK_EXE ::= "regular" "executable" "" +//! TOK_DIR ::= "directory" +//! TOK_ENT ::= "entry" "(" "name" +//! TOK_NOD ::= "node" "(" "type" +//! TOK_PAR ::= ")" +//! ``` +//! +//! # Restrictions +//! +//! NOTE: These restrictions are not (and cannot be) enforced by this module, +//! but must be enforced by its consumers, [super::reader] and [super::writer]. +//! +//! Directory entry names cannot have the reserved names `.` and `..`, nor contain +//! forward slashes. They must appear in strictly ascending lexicographic order +//! within a directory, and can be at most [MAX_NAME_LEN] bytes in length. +//! +//! Symlink targets can be at most [MAX_TARGET_LEN] bytes in length. +//! +//! Neither is permitted to be empty, or contain null bytes. + +// These values are the standard Linux length limits +/// Maximum length of a directory entry name +pub const MAX_NAME_LEN: usize = 255; +/// Maximum length of a symlink target +pub const MAX_TARGET_LEN: usize = 4095; + +#[cfg(test)] +fn token(xs: &[&str]) -> Vec<u8> { + let mut out = vec![]; + for x in xs { + let len = x.len() as u64; + out.extend_from_slice(&len.to_le_bytes()); + out.extend_from_slice(x.as_bytes()); + + let n = x.len() & 7; + if n != 0 { + const ZERO: [u8; 8] = [0; 8]; + out.extend_from_slice(&ZERO[n..]); + } + } + out +} + +pub const TOK_NAR: [u8; 56] = *b"\x0d\0\0\0\0\0\0\0nix-archive-1\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; +pub const TOK_SYM: [u8; 32] = *b"\x07\0\0\0\0\0\0\0symlink\0\x06\0\0\0\0\0\0\0target\0\0"; +pub const TOK_REG: [u8; 32] = *b"\x07\0\0\0\0\0\0\0regular\0\x08\0\0\0\0\0\0\0contents"; +pub const TOK_EXE: [u8; 64] = *b"\x07\0\0\0\0\0\0\0regular\0\x0a\0\0\0\0\0\0\0executable\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0contents"; +pub const TOK_DIR: [u8; 24] = *b"\x09\0\0\0\0\0\0\0directory\0\0\0\0\0\0\0"; +pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0name\0\0\0\0"; +pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; +pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0"; +#[cfg(feature = "async")] +const TOK_PAD_PAR: [u8; 24] = *b"\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0"; + +#[cfg(feature = "async")] +#[derive(Debug)] +pub(crate) enum PadPar {} + +#[cfg(all(feature = "async", feature = "wire"))] +impl crate::wire::reader::Tag for PadPar { + const PATTERN: &'static [u8] = &TOK_PAD_PAR; + + type Buf = [u8; 24]; + + fn make_buf() -> Self::Buf { + [0; 24] + } +} + +#[test] +fn tokens() { + let cases: &[(&[u8], &[&str])] = &[ + (&TOK_NAR, &["nix-archive-1", "(", "type"]), + (&TOK_SYM, &["symlink", "target"]), + (&TOK_REG, &["regular", "contents"]), + (&TOK_EXE, &["regular", "executable", "", "contents"]), + (&TOK_DIR, &["directory"]), + (&TOK_ENT, &["entry", "(", "name"]), + (&TOK_NOD, &["node", "(", "type"]), + (&TOK_PAR, &[")"]), + ]; + + for &(tok, xs) in cases { + assert_eq!(tok, token(xs)); + } +} + +pub use tag::Tag; +mod tag; + +tag::make! { + /// These are the node tokens, succeeding [TOK_NAR] or [TOK_NOD], + /// and preceding the next variable-length element. + pub enum Node[16] { + Sym = TOK_SYM, + Reg = TOK_REG, + Exe = TOK_EXE, + Dir = TOK_DIR, + } + + /// Directory entry or terminator + pub enum Entry[0] { + /// End of directory + None = TOK_PAR, + /// Directory entry + /// Followed by a name string, [TOK_NOD], and a [Node]. + Some = TOK_ENT, + } +} diff --git a/tvix/nix-compat/src/nar/wire/tag.rs b/tvix/nix-compat/src/nar/wire/tag.rs new file mode 100644 index 0000000000..4982a0d707 --- /dev/null +++ b/tvix/nix-compat/src/nar/wire/tag.rs @@ -0,0 +1,166 @@ +/// A type implementing Tag represents a static hash set of byte strings, +/// with a very simple perfect hash function: every element has a unique +/// discriminant at a common byte offset. The values of the type represent +/// the members by this single discriminant byte; they are indices into the +/// hash set. +pub trait Tag: Sized { + /// Discriminant offset + const OFF: usize; + /// Minimum variant length + const MIN: usize; + + /// Minimal suitably sized buffer for reading the wire representation + /// + /// HACK: This is a workaround for const generics limitations. + type Buf: AsMut<[u8]> + Send; + + /// Make an instance of [Self::Buf] + fn make_buf() -> Self::Buf; + + /// Convert a discriminant into the corresponding variant + fn from_u8(x: u8) -> Option<Self>; + + /// Convert a variant back into the wire representation + fn as_bytes(&self) -> &'static [u8]; +} + +/// Generate an enum implementing [Tag], enforcing at compile time that +/// the discriminant values are distinct. +macro_rules! make { + ( + $( + $(#[doc = $doc:expr])* + $vis:vis enum $Enum:ident[$off:expr] { + $( + $(#[doc = $var_doc:expr])* + $Var:ident = $TOK:ident, + )+ + } + )* + ) => { + $( + $(#[doc = $doc])* + #[derive(Debug, PartialEq, Eq)] + #[repr(u8)] + $vis enum $Enum { + $( + $(#[doc = $var_doc])* + $Var = $TOK[$Enum::OFF] + ),+ + } + + impl Tag for $Enum { + /// Discriminant offset + const OFF: usize = $off; + /// Minimum variant length + const MIN: usize = tag::min_of(&[$($TOK.len()),+]); + + /// Minimal suitably sized buffer for reading the wire representation + type Buf = [u8; tag::buf_of(&[$($TOK.len()),+])]; + + /// Make an instance of [Self::Buf] + #[inline(always)] + fn make_buf() -> Self::Buf { + [0u8; tag::buf_of(&[$($TOK.len()),+])] + } + + /// Convert a discriminant into the corresponding variant + #[inline(always)] + fn from_u8(x: u8) -> Option<Self> { + #[allow(non_upper_case_globals)] + mod __variant { + $( + pub const $Var: u8 = super::$Enum::$Var as u8; + )+ + } + + match x { + $(__variant::$Var => Some(Self::$Var),)+ + _ => None + } + } + + /// Convert a variant back into the wire representation + #[inline(always)] + fn as_bytes(&self) -> &'static [u8] { + match self { + $(Self::$Var => &$TOK,)+ + } + } + } + )* + }; +} + +// The following functions are written somewhat unusually, +// since they're const functions that cannot use iterators. + +/// Maximum element of a slice +const fn max_of(mut xs: &[usize]) -> usize { + let mut y = usize::MIN; + while let &[x, ref tail @ ..] = xs { + y = if x > y { x } else { y }; + xs = tail; + } + y +} + +/// Minimum element of a slice +pub const fn min_of(mut xs: &[usize]) -> usize { + let mut y = usize::MAX; + while let &[x, ref tail @ ..] = xs { + y = if x < y { x } else { y }; + xs = tail; + } + y +} + +/// Minimum buffer size to contain either of `0..Tag::MIN` and `Tag::MIN..` +/// at a particular time, for all possible tag wire representations, given +/// the sizes of all wire representations. +/// +/// # Example +/// +/// ```plain +/// OFF = 16 +/// MIN = 24 +/// MAX = 64 +/// +/// BUF = max(MIN, MAX-MIN) +/// = max(24, 64-24) +/// = max(24, 40) +/// = 40 +/// ``` +pub const fn buf_of(xs: &[usize]) -> usize { + max_of(&[min_of(xs), max_of(xs) - min_of(xs)]) +} + +pub(crate) use make; + +#[cfg(test)] +mod test { + use super::super::tag::{self, Tag}; + + const TOK_A: [u8; 3] = [0xed, 0xef, 0x1c]; + const TOK_B: [u8; 3] = [0xed, 0xf0, 0x1c]; + + const OFFSET: usize = 1; + + make! { + enum Token[OFFSET] { + A = TOK_A, + B = TOK_B, + } + } + + #[test] + fn example() { + assert_eq!(Token::from_u8(0xed), None); + + let tag = Token::from_u8(0xef).unwrap(); + assert_eq!(tag.as_bytes(), &TOK_A[..]); + + let tag = Token::from_u8(0xf0).unwrap(); + assert_eq!(tag.as_bytes(), &TOK_B[..]); + } +} diff --git a/tvix/nix-compat/src/nar/writer/async.rs b/tvix/nix-compat/src/nar/writer/async.rs new file mode 100644 index 0000000000..a2ce68fc3c --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/async.rs @@ -0,0 +1,235 @@ +//! Implements an interface for writing the Nix archive format (NAR). +//! +//! NAR files (and their hashed representations) are used in C++ Nix for +//! addressing fixed-output derivations and a variety of other things. +//! +//! NAR files can be output to any type that implements [`AsyncWrite`], and content +//! can be read from any type that implementes [`AsyncBufRead`]. +//! +//! Writing a single file might look like this: +//! +//! ```rust +//! # futures::executor::block_on(async { +//! # use tokio::io::BufReader; +//! # let some_file: Vec<u8> = vec![0, 1, 2, 3, 4]; +//! +//! // Output location to write the NAR to. +//! let mut sink: Vec<u8> = Vec::new(); +//! +//! // Instantiate writer for this output location. +//! let mut nar = nix_compat::nar::writer::r#async::open(&mut sink).await?; +//! +//! // Acquire metadata for the single file to output, and pass it in a +//! // `BufRead`-implementing type. +//! +//! let executable = false; +//! let size = some_file.len() as u64; +//! let mut reader = BufReader::new(some_file.as_slice()); +//! nar.file(executable, size, &mut reader).await?; +//! # Ok::<(), std::io::Error>(()) +//! # }); +//! ``` + +use crate::nar::wire; +use std::{ + io::{ + self, + ErrorKind::{InvalidInput, UnexpectedEof}, + }, + pin::Pin, +}; +use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncWrite, AsyncWriteExt}; + +/// Convenience type alias for types implementing [`AsyncWrite`]. +pub type Writer<'a> = dyn AsyncWrite + Unpin + Send + 'a; + +/// Create a new NAR, writing the output to the specified writer. +pub async fn open<'a, 'w: 'a>(writer: &'a mut Writer<'w>) -> io::Result<Node<'a, 'w>> { + let mut node = Node { writer }; + node.write(&wire::TOK_NAR).await?; + Ok(node) +} + +/// Single node in a NAR file. +/// +/// A NAR can be thought of as a tree of nodes represented by this type. Each +/// node can be a file, a symlink or a directory containing other nodes. +pub struct Node<'a, 'w: 'a> { + writer: &'a mut Writer<'w>, +} + +impl<'a, 'w> Node<'a, 'w> { + async fn write(&mut self, data: &[u8]) -> io::Result<()> { + self.writer.write_all(data).await + } + + async fn pad(&mut self, n: u64) -> io::Result<()> { + match (n & 7) as usize { + 0 => Ok(()), + n => self.write(&[0; 8][n..]).await, + } + } + + /// Make this node a symlink. + pub async fn symlink(mut self, target: &[u8]) -> io::Result<()> { + debug_assert!( + target.len() <= wire::MAX_TARGET_LEN, + "target.len() > {}", + wire::MAX_TARGET_LEN + ); + debug_assert!(!target.is_empty(), "target is empty"); + debug_assert!(!target.contains(&0), "target contains null byte"); + + self.write(&wire::TOK_SYM).await?; + self.write(&target.len().to_le_bytes()).await?; + self.write(target).await?; + self.pad(target.len() as u64).await?; + self.write(&wire::TOK_PAR).await?; + Ok(()) + } + + /// Make this node a single file. + pub async fn file( + mut self, + executable: bool, + size: u64, + reader: &mut (dyn AsyncBufRead + Unpin + Send), + ) -> io::Result<()> { + self.write(if executable { + &wire::TOK_EXE + } else { + &wire::TOK_REG + }) + .await?; + + self.write(&size.to_le_bytes()).await?; + + let mut need = size; + while need != 0 { + let data = reader.fill_buf().await?; + + if data.is_empty() { + return Err(UnexpectedEof.into()); + } + + let n = need.min(data.len() as u64) as usize; + self.write(&data[..n]).await?; + + need -= n as u64; + Pin::new(&mut *reader).consume(n); + } + + // bail if there's still data left in the passed reader. + // This uses the same code as [BufRead::has_data_left] (unstable). + if reader.fill_buf().await.map(|b| !b.is_empty())? { + return Err(io::Error::new( + InvalidInput, + "reader contained more data than specified size", + )); + } + + self.pad(size).await?; + self.write(&wire::TOK_PAR).await?; + + Ok(()) + } + + /// Make this node a directory, the content of which is set using the + /// resulting [`Directory`] value. + /// + /// It is the caller's responsibility to invoke [`Directory::close`], + /// or invalid archives will be produced silently. + pub async fn directory(mut self) -> io::Result<Directory<'a, 'w>> { + self.write(&wire::TOK_DIR).await?; + Ok(Directory::new(self)) + } +} + +#[cfg(debug_assertions)] +type Name = Vec<u8>; +#[cfg(not(debug_assertions))] +type Name = (); + +fn into_name(_name: &[u8]) -> Name { + #[cfg(debug_assertions)] + _name.to_owned() +} + +/// Content of a NAR node that represents a directory. +pub struct Directory<'a, 'w> { + node: Node<'a, 'w>, + prev_name: Option<Name>, +} + +impl<'a, 'w> Directory<'a, 'w> { + fn new(node: Node<'a, 'w>) -> Self { + Self { + node, + prev_name: None, + } + } + + /// Add an entry to the directory. + /// + /// The entry is simply another [`Node`], which can then be filled like the + /// root of a NAR (including, of course, by nesting directories). + /// + /// It is the caller's responsibility to ensure that directory entries are + /// written in order of ascending name. If this is not ensured, this method + /// may panic or silently produce invalid archives. + pub async fn entry(&mut self, name: &[u8]) -> io::Result<Node<'_, 'w>> { + debug_assert!( + name.len() <= wire::MAX_NAME_LEN, + "name.len() > {}", + wire::MAX_NAME_LEN + ); + debug_assert!(!name.is_empty(), "name is empty"); + debug_assert!(!name.contains(&0), "name contains null byte"); + debug_assert!(!name.contains(&b'/'), "name contains {:?}", '/'); + debug_assert!(name != b".", "name == {:?}", "."); + debug_assert!(name != b"..", "name == {:?}", ".."); + + match self.prev_name { + None => { + self.prev_name = Some(into_name(name)); + } + Some(ref mut _prev_name) => { + #[cfg(debug_assertions)] + { + use bstr::ByteSlice; + assert!( + &**_prev_name < name, + "misordered names: {:?} >= {:?}", + _prev_name.as_bstr(), + name.as_bstr() + ); + name.clone_into(_prev_name); + } + self.node.write(&wire::TOK_PAR).await?; + } + } + + self.node.write(&wire::TOK_ENT).await?; + self.node.write(&name.len().to_le_bytes()).await?; + self.node.write(name).await?; + self.node.pad(name.len() as u64).await?; + self.node.write(&wire::TOK_NOD).await?; + + Ok(Node { + writer: &mut *self.node.writer, + }) + } + + /// Close a directory and write terminators for the directory to the NAR. + /// + /// **Important:** This *must* be called when all entries have been written + /// in a directory, otherwise the resulting NAR file will be invalid. + pub async fn close(mut self) -> io::Result<()> { + if self.prev_name.is_some() { + self.node.write(&wire::TOK_PAR).await?; + } + + self.node.write(&wire::TOK_PAR).await?; + Ok(()) + } +} diff --git a/tvix/nix-compat/src/nar/writer/mod.rs b/tvix/nix-compat/src/nar/writer/mod.rs new file mode 100644 index 0000000000..fe8ccccb37 --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/mod.rs @@ -0,0 +1,9 @@ +pub use sync::*; + +pub mod sync; + +#[cfg(test)] +mod test; + +#[cfg(feature = "async")] +pub mod r#async; diff --git a/tvix/nix-compat/src/nar/writer/sync.rs b/tvix/nix-compat/src/nar/writer/sync.rs new file mode 100644 index 0000000000..6270129028 --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/sync.rs @@ -0,0 +1,224 @@ +//! Implements an interface for writing the Nix archive format (NAR). +//! +//! NAR files (and their hashed representations) are used in C++ Nix for +//! addressing fixed-output derivations and a variety of other things. +//! +//! NAR files can be output to any type that implements [`Write`], and content +//! can be read from any type that implementes [`BufRead`]. +//! +//! Writing a single file might look like this: +//! +//! ```rust +//! # use std::io::BufReader; +//! # let some_file: Vec<u8> = vec![0, 1, 2, 3, 4]; +//! +//! // Output location to write the NAR to. +//! let mut sink: Vec<u8> = Vec::new(); +//! +//! // Instantiate writer for this output location. +//! let mut nar = nix_compat::nar::writer::open(&mut sink)?; +//! +//! // Acquire metadata for the single file to output, and pass it in a +//! // `BufRead`-implementing type. +//! +//! let executable = false; +//! let size = some_file.len() as u64; +//! let mut reader = BufReader::new(some_file.as_slice()); +//! nar.file(executable, size, &mut reader)?; +//! # Ok::<(), std::io::Error>(()) +//! ``` + +use crate::nar::wire; +use std::io::{ + self, BufRead, + ErrorKind::{InvalidInput, UnexpectedEof}, + Write, +}; + +/// Convenience type alias for types implementing [`Write`]. +pub type Writer<'a> = dyn Write + Send + 'a; + +/// Create a new NAR, writing the output to the specified writer. +pub fn open<'a, 'w: 'a>(writer: &'a mut Writer<'w>) -> io::Result<Node<'a, 'w>> { + let mut node = Node { writer }; + node.write(&wire::TOK_NAR)?; + Ok(node) +} + +/// Single node in a NAR file. +/// +/// A NAR can be thought of as a tree of nodes represented by this type. Each +/// node can be a file, a symlink or a directory containing other nodes. +pub struct Node<'a, 'w: 'a> { + writer: &'a mut Writer<'w>, +} + +impl<'a, 'w> Node<'a, 'w> { + fn write(&mut self, data: &[u8]) -> io::Result<()> { + self.writer.write_all(data) + } + + fn pad(&mut self, n: u64) -> io::Result<()> { + match (n & 7) as usize { + 0 => Ok(()), + n => self.write(&[0; 8][n..]), + } + } + + /// Make this node a symlink. + pub fn symlink(mut self, target: &[u8]) -> io::Result<()> { + debug_assert!( + target.len() <= wire::MAX_TARGET_LEN, + "target.len() > {}", + wire::MAX_TARGET_LEN + ); + debug_assert!(!target.is_empty(), "target is empty"); + debug_assert!(!target.contains(&0), "target contains null byte"); + + self.write(&wire::TOK_SYM)?; + self.write(&target.len().to_le_bytes())?; + self.write(target)?; + self.pad(target.len() as u64)?; + self.write(&wire::TOK_PAR)?; + Ok(()) + } + + /// Make this node a single file. + pub fn file(mut self, executable: bool, size: u64, reader: &mut dyn BufRead) -> io::Result<()> { + self.write(if executable { + &wire::TOK_EXE + } else { + &wire::TOK_REG + })?; + + self.write(&size.to_le_bytes())?; + + let mut need = size; + while need != 0 { + let data = reader.fill_buf()?; + + if data.is_empty() { + return Err(UnexpectedEof.into()); + } + + let n = need.min(data.len() as u64) as usize; + self.write(&data[..n])?; + + need -= n as u64; + reader.consume(n); + } + + // bail if there's still data left in the passed reader. + // This uses the same code as [BufRead::has_data_left] (unstable). + if reader.fill_buf().map(|b| !b.is_empty())? { + return Err(io::Error::new( + InvalidInput, + "reader contained more data than specified size", + )); + } + + self.pad(size)?; + self.write(&wire::TOK_PAR)?; + + Ok(()) + } + + /// Make this node a directory, the content of which is set using the + /// resulting [`Directory`] value. + /// + /// It is the caller's responsibility to invoke [`Directory::close`], + /// or invalid archives will be produced silently. + pub fn directory(mut self) -> io::Result<Directory<'a, 'w>> { + self.write(&wire::TOK_DIR)?; + Ok(Directory::new(self)) + } +} + +#[cfg(debug_assertions)] +type Name = Vec<u8>; +#[cfg(not(debug_assertions))] +type Name = (); + +fn into_name(_name: &[u8]) -> Name { + #[cfg(debug_assertions)] + _name.to_owned() +} + +/// Content of a NAR node that represents a directory. +pub struct Directory<'a, 'w> { + node: Node<'a, 'w>, + prev_name: Option<Name>, +} + +impl<'a, 'w> Directory<'a, 'w> { + fn new(node: Node<'a, 'w>) -> Self { + Self { + node, + prev_name: None, + } + } + + /// Add an entry to the directory. + /// + /// The entry is simply another [`Node`], which can then be filled like the + /// root of a NAR (including, of course, by nesting directories). + /// + /// It is the caller's responsibility to ensure that directory entries are + /// written in order of ascending name. If this is not ensured, this method + /// may panic or silently produce invalid archives. + pub fn entry(&mut self, name: &[u8]) -> io::Result<Node<'_, 'w>> { + debug_assert!( + name.len() <= wire::MAX_NAME_LEN, + "name.len() > {}", + wire::MAX_NAME_LEN + ); + debug_assert!(!name.is_empty(), "name is empty"); + debug_assert!(!name.contains(&0), "name contains null byte"); + debug_assert!(!name.contains(&b'/'), "name contains {:?}", '/'); + debug_assert!(name != b".", "name == {:?}", "."); + debug_assert!(name != b"..", "name == {:?}", ".."); + + match self.prev_name { + None => { + self.prev_name = Some(into_name(name)); + } + Some(ref mut _prev_name) => { + #[cfg(debug_assertions)] + { + use bstr::ByteSlice; + assert!( + &**_prev_name < name, + "misordered names: {:?} >= {:?}", + _prev_name.as_bstr(), + name.as_bstr() + ); + name.clone_into(_prev_name); + } + self.node.write(&wire::TOK_PAR)?; + } + } + + self.node.write(&wire::TOK_ENT)?; + self.node.write(&name.len().to_le_bytes())?; + self.node.write(name)?; + self.node.pad(name.len() as u64)?; + self.node.write(&wire::TOK_NOD)?; + + Ok(Node { + writer: &mut *self.node.writer, + }) + } + + /// Close a directory and write terminators for the directory to the NAR. + /// + /// **Important:** This *must* be called when all entries have been written + /// in a directory, otherwise the resulting NAR file will be invalid. + pub fn close(mut self) -> io::Result<()> { + if self.prev_name.is_some() { + self.node.write(&wire::TOK_PAR)?; + } + + self.node.write(&wire::TOK_PAR)?; + Ok(()) + } +} diff --git a/tvix/nix-compat/src/nar/writer/test.rs b/tvix/nix-compat/src/nar/writer/test.rs new file mode 100644 index 0000000000..d7f18a49af --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/test.rs @@ -0,0 +1,128 @@ +use crate::nar; + +#[test] +fn symlink() { + let mut buf = vec![]; + let node = nar::writer::open(&mut buf).unwrap(); + + node.symlink("/nix/store/somewhereelse".as_bytes()).unwrap(); + + assert_eq!(include_bytes!("../tests/symlink.nar"), buf.as_slice()); +} + +#[cfg(feature = "async")] +#[tokio::test] +async fn symlink_async() { + let mut buf = vec![]; + + let node = nar::writer::r#async::open(&mut buf).await.unwrap(); + node.symlink("/nix/store/somewhereelse".as_bytes()) + .await + .unwrap(); + + assert_eq!(include_bytes!("../tests/symlink.nar"), buf.as_slice()); +} + +#[test] +fn file() { + let mut buf = vec![]; + let node = nar::writer::open(&mut buf).unwrap(); + + let file_contents = "Hello World!".to_string(); + node.file( + false, + file_contents.len() as u64, + &mut std::io::Cursor::new(file_contents), + ) + .unwrap(); + + assert_eq!(include_bytes!("../tests/helloworld.nar"), buf.as_slice()); +} + +#[cfg(feature = "async")] +#[tokio::test] +async fn file_async() { + use std::io::Cursor; + + let mut buf = vec![]; + + let node = nar::writer::r#async::open(&mut buf).await.unwrap(); + + let file_contents = "Hello World!".to_string(); + node.file( + false, + file_contents.len() as u64, + &mut Cursor::new(file_contents), + ) + .await + .unwrap(); + + assert_eq!(include_bytes!("../tests/helloworld.nar"), buf.as_slice()); +} + +#[test] +fn complicated() { + let mut buf = vec![]; + let node = nar::writer::open(&mut buf).unwrap(); + + let mut dir_node = node.directory().unwrap(); + + let e = dir_node.entry(".keep".as_bytes()).unwrap(); + e.file(false, 0, &mut std::io::Cursor::new([])) + .expect("read .keep must succeed"); + + let e = dir_node.entry("aa".as_bytes()).unwrap(); + e.symlink("/nix/store/somewhereelse".as_bytes()) + .expect("symlink must succeed"); + + let e = dir_node.entry("keep".as_bytes()).unwrap(); + let mut subdir_node = e.directory().expect("directory must succeed"); + + let e_sub = subdir_node + .entry(".keep".as_bytes()) + .expect("subdir entry must succeed"); + e_sub.file(false, 0, &mut std::io::Cursor::new([])).unwrap(); + + // close the subdir, and then the dir, which is required. + subdir_node.close().unwrap(); + dir_node.close().unwrap(); + + assert_eq!(include_bytes!("../tests/complicated.nar"), buf.as_slice()); +} + +#[cfg(feature = "async")] +#[tokio::test] +async fn complicated_async() { + use std::io::Cursor; + + let mut buf = vec![]; + + let node = nar::writer::r#async::open(&mut buf).await.unwrap(); + + let mut dir_node = node.directory().await.unwrap(); + + let e = dir_node.entry(".keep".as_bytes()).await.unwrap(); + e.file(false, 0, &mut Cursor::new([])) + .await + .expect("read .keep must succeed"); + + let e = dir_node.entry("aa".as_bytes()).await.unwrap(); + e.symlink("/nix/store/somewhereelse".as_bytes()) + .await + .expect("symlink must succeed"); + + let e = dir_node.entry("keep".as_bytes()).await.unwrap(); + let mut subdir_node = e.directory().await.expect("directory must succeed"); + + let e_sub = subdir_node + .entry(".keep".as_bytes()) + .await + .expect("subdir entry must succeed"); + e_sub.file(false, 0, &mut Cursor::new([])).await.unwrap(); + + // close the subdir, and then the dir, which is required. + subdir_node.close().await.unwrap(); + dir_node.close().await.unwrap(); + + assert_eq!(include_bytes!("../tests/complicated.nar"), buf.as_slice()); +} |