From 671bdff5dccf335ad18630a08572e0cc89205ec7 Mon Sep 17 00:00:00 2001 From: tcmal Date: Wed, 24 Apr 2024 13:59:11 +0100 Subject: test(tvix/nix-compat): add debug assertions for nar reader Adds debug assertions to ensure that the reader's variants are upheld. If any of the following happens, then the currently in use reader must be abandoned: * A directory or file reader encounters an error * A directory or file reader is dropped before being fully read from Additionally, a directory reader must not be read from again after it has returned None. These checks are only used when debug_assertions are on, so vanish in release mode. Resolves two TODO items added by edef Change-Id: I27bd9643a632798db5351957506c166b9bd5ca4e Reviewed-on: https://cl.tvl.fyi/c/depot/+/11508 Reviewed-by: flokli Reviewed-by: edef Autosubmit: Aria Shrimpton Tested-by: BuildkiteCI --- tvix/nix-compat/src/nar/reader/mod.rs | 238 +++++++++++++++++++++++++---- tvix/nix-compat/src/nar/reader/test.rs | 272 ++++++++++++++++++++++++++------- 2 files changed, 426 insertions(+), 84 deletions(-) diff --git a/tvix/nix-compat/src/nar/reader/mod.rs b/tvix/nix-compat/src/nar/reader/mod.rs index fa7ddc77f96c..75463a6450a5 100644 --- a/tvix/nix-compat/src/nar/reader/mod.rs +++ b/tvix/nix-compat/src/nar/reader/mod.rs @@ -19,10 +19,49 @@ mod test; pub type Reader<'a> = dyn BufRead + Send + 'a; +struct ArchiveReader<'a, 'r> { + inner: &'a mut Reader<'r>, + + /// In debug mode, also track when we need to abandon this archive reader. + /// The archive reader must be abandoned when: + /// * An error is encountered at any point + /// * A file or directory reader is dropped before being read entirely. + /// All of these checks vanish in release mode. + #[cfg(debug_assertions)] + status: ArchiveReaderStatus<'a>, +} + +macro_rules! poison { + ($it:expr) => { + #[cfg(debug_assertions)] + { + $it.status.poison(); + } + }; +} + +macro_rules! try_or_poison { + ($it:expr, $ex:expr) => { + match $ex { + Ok(x) => x, + Err(e) => { + poison!($it); + return Err(e.into()); + } + } + }; +} /// Start reading a NAR file from `reader`. pub fn open<'a, 'r>(reader: &'a mut Reader<'r>) -> io::Result> { read::token(reader, &wire::TOK_NAR)?; - Node::new(reader) + Node::new(ArchiveReader { + inner: reader, + #[cfg(debug_assertions)] + status: ArchiveReaderStatus::StackTop { + poisoned: false, + ready: true, + }, + }) } pub enum Node<'a, 'r> { @@ -41,21 +80,28 @@ impl<'a, 'r> Node<'a, 'r> { /// /// Reading the terminating [wire::TOK_PAR] is done immediately for [Node::Symlink], /// but is otherwise left to [DirReader] or [FileReader]. - fn new(reader: &'a mut Reader<'r>) -> io::Result { - Ok(match read::tag(reader)? { + #[allow(unused_mut)] // due to debug_assertions code + fn new(mut reader: ArchiveReader<'a, 'r>) -> io::Result { + Ok(match read::tag(reader.inner)? { wire::Node::Sym => { - let target = read::bytes(reader, wire::MAX_TARGET_LEN)?; + let target = + try_or_poison!(reader, read::bytes(reader.inner, wire::MAX_TARGET_LEN)); if target.is_empty() || target.contains(&0) { + poison!(reader); return Err(InvalidData.into()); } - read::token(reader, &wire::TOK_PAR)?; + try_or_poison!(reader, read::token(reader.inner, &wire::TOK_PAR)); + #[cfg(debug_assertions)] + { + reader.status.ready_parent(); // Immediately allow reading from parent again + } Node::Symlink { target } } tag @ (wire::Node::Reg | wire::Node::Exe) => { - let len = read::u64(reader)?; + let len = try_or_poison!(&mut reader, read::u64(reader.inner)); Node::File { executable: tag == wire::Node::Exe, @@ -74,10 +120,8 @@ impl<'a, 'r> Node<'a, 'r> { /// * You must abandon the entire archive reader upon the first error. /// /// It's fine to read exactly `reader.len()` bytes without ever seeing an explicit EOF. -/// -/// TODO(edef): enforce these in `#[cfg(debug_assertions)]` pub struct FileReader<'a, 'r> { - reader: &'a mut Reader<'r>, + reader: ArchiveReader<'a, 'r>, len: u64, /// Truncated original file length for padding computation. /// We only care about the 3 least significant bits; semantically, this is a u3. @@ -87,12 +131,17 @@ pub struct FileReader<'a, 'r> { impl<'a, 'r> FileReader<'a, 'r> { /// Instantiate a new reader, starting after [wire::TOK_REG] or [wire::TOK_EXE]. /// We handle the terminating [wire::TOK_PAR] on semantic EOF. - fn new(reader: &'a mut Reader<'r>, len: u64) -> io::Result { + #[allow(unused_mut)] // due to debug_assertions code + fn new(mut reader: ArchiveReader<'a, 'r>, len: u64) -> io::Result { // For zero-length files, we have to read the terminating TOK_PAR // immediately, since FileReader::read may never be called; we've // already reached semantic EOF by definition. if len == 0 { - read::token(reader, &wire::TOK_PAR)?; + read::token(reader.inner, &wire::TOK_PAR)?; + #[cfg(debug_assertions)] + { + reader.status.ready_parent(); + } } Ok(Self { @@ -121,9 +170,12 @@ impl FileReader<'_, '_> { return Ok(&[]); } - let mut buf = self.reader.fill_buf()?; + self.reader.check_correct(); + + let mut buf = try_or_poison!(self.reader, self.reader.inner.fill_buf()); if buf.is_empty() { + poison!(self.reader); return Err(UnexpectedEof.into()); } @@ -141,12 +193,14 @@ impl FileReader<'_, '_> { return Ok(()); } + self.reader.check_correct(); + self.len = self .len .checked_sub(n as u64) .expect("consumed bytes past EOF"); - self.reader.consume(n); + self.reader.inner.consume(n); if self.is_empty() { self.finish()?; @@ -159,7 +213,7 @@ impl FileReader<'_, '_> { pub fn copy(&mut self, mut dst: impl Write) -> io::Result<()> { while !self.is_empty() { let buf = self.fill_buf()?; - let n = dst.write(buf)?; + let n = try_or_poison!(self.reader, dst.write(buf)); self.consume(n)?; } @@ -173,14 +227,17 @@ impl Read for FileReader<'_, '_> { return Ok(0); } + self.reader.check_correct(); + if buf.len() as u64 > self.len { buf = &mut buf[..self.len as usize]; } - let n = self.reader.read(buf)?; + let n = try_or_poison!(self.reader, self.reader.inner.read(buf)); self.len -= n as u64; if n == 0 { + poison!(self.reader); return Err(UnexpectedEof.into()); } @@ -200,21 +257,30 @@ impl FileReader<'_, '_> { if pad != 0 { let mut buf = [0; 8]; - self.reader.read_exact(&mut buf[pad..])?; + try_or_poison!(self.reader, self.reader.inner.read_exact(&mut buf[pad..])); if buf != [0; 8] { + poison!(self.reader); return Err(InvalidData.into()); } } - read::token(self.reader, &wire::TOK_PAR) + try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR)); + + #[cfg(debug_assertions)] + { + // Done with reading this file, allow going back up the chain of readers + self.reader.status.ready_parent(); + } + + Ok(()) } } /// A directory iterator, yielding a sequence of [Node]s. /// It must be fully consumed before reading further from the [DirReader] that produced it, if any. pub struct DirReader<'a, 'r> { - reader: &'a mut Reader<'r>, + reader: ArchiveReader<'a, 'r>, /// Previous directory entry name. /// We have to hang onto this to enforce name monotonicity. prev_name: Option>, @@ -226,7 +292,7 @@ pub struct Entry<'a, 'r> { } impl<'a, 'r> DirReader<'a, 'r> { - fn new(reader: &'a mut Reader<'r>) -> Self { + fn new(reader: ArchiveReader<'a, 'r>) -> Self { Self { reader, prev_name: None, @@ -242,23 +308,30 @@ impl<'a, 'r> DirReader<'a, 'r> { /// * You must abandon the entire archive reader on the first error. /// * You must abandon the directory reader upon the first [None]. /// * Even if you know the amount of elements up front, you must keep reading until you encounter [None]. - /// - /// TODO(edef): enforce these in `#[cfg(debug_assertions)]` #[allow(clippy::should_implement_trait)] - pub fn next(&mut self) -> io::Result> { + pub fn next(&mut self) -> io::Result>> { + self.reader.check_correct(); + // COME FROM the previous iteration: if we've already read an entry, // read its terminating TOK_PAR here. if self.prev_name.is_some() { - read::token(self.reader, &wire::TOK_PAR)?; + try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR)); } // Determine if there are more entries to follow - if let wire::Entry::None = read::tag(self.reader)? { + if let wire::Entry::None = try_or_poison!(self.reader, read::tag(self.reader.inner)) { // We've reached the end of this directory. + #[cfg(debug_assertions)] + { + self.reader.status.ready_parent(); + } return Ok(None); } - let name = read::bytes(self.reader, wire::MAX_NAME_LEN)?; + let name = try_or_poison!( + self.reader, + read::bytes(self.reader.inner, wire::MAX_NAME_LEN) + ); if name.is_empty() || name.contains(&0) @@ -266,6 +339,7 @@ impl<'a, 'r> DirReader<'a, 'r> { || name == b"." || name == b".." { + poison!(self.reader); return Err(InvalidData.into()); } @@ -276,6 +350,7 @@ impl<'a, 'r> DirReader<'a, 'r> { } Some(prev_name) => { if *prev_name >= name { + poison!(self.reader); return Err(InvalidData.into()); } @@ -283,11 +358,120 @@ impl<'a, 'r> DirReader<'a, 'r> { } } - read::token(self.reader, &wire::TOK_NOD)?; + try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD)); Ok(Some(Entry { name, - node: Node::new(&mut self.reader)?, + // Don't need to worry about poisoning here: Node::new will do it for us if needed + node: Node::new(self.reader.child())?, })) } } + +/// We use a stack of statuses to: +/// * Share poisoned state across all objects from the same underlying reader, +/// so we can check they are abandoned when an error occurs +/// * Make sure only the most recently created object is read from, and is fully exhausted +/// before anything it was created from is used again. +#[cfg(debug_assertions)] +enum ArchiveReaderStatus<'a> { + StackTop { + poisoned: bool, + ready: bool, + }, + StackChild { + poisoned: &'a mut bool, + parent_ready: &'a mut bool, + ready: bool, + }, +} + +#[cfg(debug_assertions)] +impl ArchiveReaderStatus<'_> { + /// Poison all the objects sharing the same reader, to be used when an error occurs + fn poison(&mut self) { + match self { + ArchiveReaderStatus::StackTop { poisoned: x, .. } => *x = true, + ArchiveReaderStatus::StackChild { poisoned: x, .. } => **x = true, + } + } + + /// Mark the parent as ready, allowing it to be used again and preventing this reference to the reader being used again. + fn ready_parent(&mut self) { + match self { + Self::StackTop { ready, .. } => { + *ready = false; + } + Self::StackChild { + ready, + parent_ready, + .. + } => { + *ready = false; + **parent_ready = true; + } + }; + } + + fn poisoned(&self) -> bool { + match self { + Self::StackTop { poisoned, .. } => *poisoned, + Self::StackChild { poisoned, .. } => **poisoned, + } + } + + fn ready(&self) -> bool { + match self { + Self::StackTop { ready, .. } => *ready, + Self::StackChild { ready, .. } => *ready, + } + } +} + +impl<'a, 'r> ArchiveReader<'a, 'r> { + /// Create a new child reader from this one. + /// In debug mode, this reader will panic if called before the new child is exhausted / calls `ready_parent` + fn child(&mut self) -> ArchiveReader<'_, 'r> { + ArchiveReader { + inner: self.inner, + #[cfg(debug_assertions)] + status: match &mut self.status { + ArchiveReaderStatus::StackTop { poisoned, ready } => { + *ready = false; + ArchiveReaderStatus::StackChild { + poisoned, + parent_ready: ready, + ready: true, + } + } + ArchiveReaderStatus::StackChild { + poisoned, ready, .. + } => { + *ready = false; + ArchiveReaderStatus::StackChild { + poisoned, + parent_ready: ready, + ready: true, + } + } + }, + } + } + + /// Check the reader is in the correct status. + /// Only does anything when debug assertions are on. + #[inline(always)] + fn check_correct(&self) { + #[cfg(debug_assertions)] + { + debug_assert!( + !self.status.poisoned(), + "Archive reader used after it was meant to be abandoned!" + ); + debug_assert!( + self.status.ready(), + "Non-ready archive reader used! (Should've been reading from something else)" + ) + } + } +} diff --git a/tvix/nix-compat/src/nar/reader/test.rs b/tvix/nix-compat/src/nar/reader/test.rs index fd0d6a9f5afd..02dc4767c916 100644 --- a/tvix/nix-compat/src/nar/reader/test.rs +++ b/tvix/nix-compat/src/nar/reader/test.rs @@ -46,75 +46,233 @@ fn complicated() { match node { nar::reader::Node::Directory(mut dir_reader) => { // first entry is .keep, an empty regular file. - let entry = dir_reader - .next() - .expect("next must succeed") - .expect("must be some"); - - assert_eq!(&b".keep"[..], &entry.name); - - match entry.node { - nar::reader::Node::File { - executable, - mut reader, - } => { - assert!(!executable); - assert_eq!(reader.read(&mut [0]).unwrap(), 0); + must_read_file( + ".keep", + dir_reader + .next() + .expect("next must succeed") + .expect("must be some"), + ); + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .expect("next must be some") + .expect("must be some"); + + assert_eq!(&b"keep"[..], &entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + { + // first entry is .keep, an empty regular file. + let entry = subdir_reader + .next() + .expect("next must succeed") + .expect("must be some"); + + must_read_file(".keep", entry); + } + + // we must read the None + assert!( + subdir_reader.next().expect("next must succeed").is_none(), + "keep directory contains only .keep" + ); + } + _ => panic!("unexpected type for keep/.keep"), } - _ => panic!("unexpected type for .keep"), - } + }; + + // reading more entries yields None (and we actually must read until this) + assert!(dir_reader.next().expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[test] +#[should_panic] +fn file_read_abandoned() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + { + let entry = dir_reader + .next() + .expect("next must succeed") + .expect("must be some"); + + assert_eq!(&b".keep"[..], &entry.name); + // don't bother to finish reading it. + }; + + // this should panic (not return an error), because we are meant to abandon the archive reader now. + assert!(dir_reader.next().expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[test] +#[should_panic] +fn dir_read_abandoned() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .expect("next must succeed") + .expect("must be some"), + ); // second entry is aa, a symlink to /nix/store/somewhereelse - let entry = dir_reader - .next() - .expect("next must be some") - .expect("must be some"); + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .expect("next must be some") + .expect("must be some"), + ); - assert_eq!(&b"aa"[..], &entry.name); + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .expect("next must be some") + .expect("must be some"); - match entry.node { - nar::reader::Node::Symlink { target } => { - assert_eq!(&b"/nix/store/somewhereelse"[..], &target); + assert_eq!(&b"keep"[..], &entry.name); + + match entry.node { + nar::reader::Node::Directory(_) => { + // don't finish using it, which poisons the archive reader + } + _ => panic!("unexpected type for keep/.keep"), } - _ => panic!("unexpected type for aa"), - } - - // third entry is a directory called "keep" - let entry = dir_reader - .next() - .expect("next must be some") - .expect("must be some"); - - assert_eq!(&b"keep"[..], &entry.name); - - match entry.node { - nar::reader::Node::Directory(mut subdir_reader) => { - // first entry is .keep, an empty regular file. - let entry = subdir_reader - .next() - .expect("next must succeed") - .expect("must be some"); - - // … it contains a single .keep, an empty regular file. - assert_eq!(&b".keep"[..], &entry.name); - - match entry.node { - nar::reader::Node::File { - executable, - mut reader, - } => { - assert!(!executable); - assert_eq!(reader.read(&mut [0]).unwrap(), 0); - } - _ => panic!("unexpected type for keep/.keep"), + }; + + // this should panic, because we didn't finish reading the child subdirectory + assert!(dir_reader.next().expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[test] +#[should_panic] +fn dir_read_after_none() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .expect("next must succeed") + .expect("must be some"), + ); + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .expect("next must be some") + .expect("must be some"); + + assert_eq!(&b"keep"[..], &entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + subdir_reader + .next() + .expect("next must succeed") + .expect("must be some"), + ); + + // we must read the None + assert!( + subdir_reader.next().expect("next must succeed").is_none(), + "keep directory contains only .keep" + ); } + _ => panic!("unexpected type for keep/.keep"), } - _ => panic!("unexpected type for keep/.keep"), - } + }; // reading more entries yields None (and we actually must read until this) assert!(dir_reader.next().expect("must succeed").is_none()); + + // this should panic, because we already got a none so we're meant to stop. + dir_reader.next().unwrap(); + unreachable!() } _ => panic!("unexpected type"), } } + +fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { + assert_eq!(name.as_bytes(), &entry.name); + + match entry.node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + assert_eq!(reader.read(&mut [0]).unwrap(), 0); + } + _ => panic!("unexpected type for {}", name), + } +} + +fn must_be_symlink( + name: &'static str, + exp_target: &'static str, + entry: nar::reader::Entry<'_, '_>, +) { + assert_eq!(name.as_bytes(), &entry.name); + + match entry.node { + nar::reader::Node::Symlink { target } => { + assert_eq!(exp_target.as_bytes(), &target); + } + _ => panic!("unexpected type for {}", name), + } +} -- cgit 1.4.1