about summary refs log tree commit diff
diff options
context:
space:
mode:
authortcmal <me@aria.rip>2024-04-24T12·59+0100
committerclbot <clbot@tvl.fyi>2024-04-25T17·29+0000
commit671bdff5dccf335ad18630a08572e0cc89205ec7 (patch)
treedde51efba0996f0411cef423ce50f4a66ffc1a15
parentf0e428db754cbd3269f92f3d5bd98fafc122a298 (diff)
test(tvix/nix-compat): add debug assertions for nar reader r/8007
Adds debug assertions to ensure that the reader's variants are upheld.
If any of the following happens, then the currently in use reader must
be abandoned:
  * A directory or file reader encounters an error
  * A directory or file reader is dropped before being fully read from
Additionally, a directory reader must not be read from again after it
has returned None.
These checks are only used when debug_assertions are on, so vanish in
release mode.

Resolves two TODO items added by edef

Change-Id: I27bd9643a632798db5351957506c166b9bd5ca4e
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11508
Reviewed-by: flokli <flokli@flokli.de>
Reviewed-by: edef <edef@edef.eu>
Autosubmit: Aria Shrimpton <me@aria.rip>
Tested-by: BuildkiteCI
-rw-r--r--tvix/nix-compat/src/nar/reader/mod.rs238
-rw-r--r--tvix/nix-compat/src/nar/reader/test.rs272
2 files changed, 426 insertions, 84 deletions
diff --git a/tvix/nix-compat/src/nar/reader/mod.rs b/tvix/nix-compat/src/nar/reader/mod.rs
index fa7ddc77f96c..75463a6450a5 100644
--- a/tvix/nix-compat/src/nar/reader/mod.rs
+++ b/tvix/nix-compat/src/nar/reader/mod.rs
@@ -19,10 +19,49 @@ mod test;
 
 pub type Reader<'a> = dyn BufRead + Send + 'a;
 
+struct ArchiveReader<'a, 'r> {
+    inner: &'a mut Reader<'r>,
+
+    /// In debug mode, also track when we need to abandon this archive reader.
+    /// The archive reader must be abandoned when:
+    ///   * An error is encountered at any point
+    ///   * A file or directory reader is dropped before being read entirely.
+    /// All of these checks vanish in release mode.
+    #[cfg(debug_assertions)]
+    status: ArchiveReaderStatus<'a>,
+}
+
+macro_rules! poison {
+    ($it:expr) => {
+        #[cfg(debug_assertions)]
+        {
+            $it.status.poison();
+        }
+    };
+}
+
+macro_rules! try_or_poison {
+    ($it:expr, $ex:expr) => {
+        match $ex {
+            Ok(x) => x,
+            Err(e) => {
+                poison!($it);
+                return Err(e.into());
+            }
+        }
+    };
+}
 /// Start reading a NAR file from `reader`.
 pub fn open<'a, 'r>(reader: &'a mut Reader<'r>) -> io::Result<Node<'a, 'r>> {
     read::token(reader, &wire::TOK_NAR)?;
-    Node::new(reader)
+    Node::new(ArchiveReader {
+        inner: reader,
+        #[cfg(debug_assertions)]
+        status: ArchiveReaderStatus::StackTop {
+            poisoned: false,
+            ready: true,
+        },
+    })
 }
 
 pub enum Node<'a, 'r> {
@@ -41,21 +80,28 @@ impl<'a, 'r> Node<'a, 'r> {
     ///
     /// Reading the terminating [wire::TOK_PAR] is done immediately for [Node::Symlink],
     /// but is otherwise left to [DirReader] or [FileReader].
-    fn new(reader: &'a mut Reader<'r>) -> io::Result<Self> {
-        Ok(match read::tag(reader)? {
+    #[allow(unused_mut)] // due to debug_assertions code
+    fn new(mut reader: ArchiveReader<'a, 'r>) -> io::Result<Self> {
+        Ok(match read::tag(reader.inner)? {
             wire::Node::Sym => {
-                let target = read::bytes(reader, wire::MAX_TARGET_LEN)?;
+                let target =
+                    try_or_poison!(reader, read::bytes(reader.inner, wire::MAX_TARGET_LEN));
 
                 if target.is_empty() || target.contains(&0) {
+                    poison!(reader);
                     return Err(InvalidData.into());
                 }
 
-                read::token(reader, &wire::TOK_PAR)?;
+                try_or_poison!(reader, read::token(reader.inner, &wire::TOK_PAR));
+                #[cfg(debug_assertions)]
+                {
+                    reader.status.ready_parent(); // Immediately allow reading from parent again
+                }
 
                 Node::Symlink { target }
             }
             tag @ (wire::Node::Reg | wire::Node::Exe) => {
-                let len = read::u64(reader)?;
+                let len = try_or_poison!(&mut reader, read::u64(reader.inner));
 
                 Node::File {
                     executable: tag == wire::Node::Exe,
@@ -74,10 +120,8 @@ impl<'a, 'r> Node<'a, 'r> {
 ///  * You must abandon the entire archive reader upon the first error.
 ///
 /// It's fine to read exactly `reader.len()` bytes without ever seeing an explicit EOF.
-///
-/// TODO(edef): enforce these in `#[cfg(debug_assertions)]`
 pub struct FileReader<'a, 'r> {
-    reader: &'a mut Reader<'r>,
+    reader: ArchiveReader<'a, 'r>,
     len: u64,
     /// Truncated original file length for padding computation.
     /// We only care about the 3 least significant bits; semantically, this is a u3.
@@ -87,12 +131,17 @@ pub struct FileReader<'a, 'r> {
 impl<'a, 'r> FileReader<'a, 'r> {
     /// Instantiate a new reader, starting after [wire::TOK_REG] or [wire::TOK_EXE].
     /// We handle the terminating [wire::TOK_PAR] on semantic EOF.
-    fn new(reader: &'a mut Reader<'r>, len: u64) -> io::Result<Self> {
+    #[allow(unused_mut)] // due to debug_assertions code
+    fn new(mut reader: ArchiveReader<'a, 'r>, len: u64) -> io::Result<Self> {
         // For zero-length files, we have to read the terminating TOK_PAR
         // immediately, since FileReader::read may never be called; we've
         // already reached semantic EOF by definition.
         if len == 0 {
-            read::token(reader, &wire::TOK_PAR)?;
+            read::token(reader.inner, &wire::TOK_PAR)?;
+            #[cfg(debug_assertions)]
+            {
+                reader.status.ready_parent();
+            }
         }
 
         Ok(Self {
@@ -121,9 +170,12 @@ impl FileReader<'_, '_> {
             return Ok(&[]);
         }
 
-        let mut buf = self.reader.fill_buf()?;
+        self.reader.check_correct();
+
+        let mut buf = try_or_poison!(self.reader, self.reader.inner.fill_buf());
 
         if buf.is_empty() {
+            poison!(self.reader);
             return Err(UnexpectedEof.into());
         }
 
@@ -141,12 +193,14 @@ impl FileReader<'_, '_> {
             return Ok(());
         }
 
+        self.reader.check_correct();
+
         self.len = self
             .len
             .checked_sub(n as u64)
             .expect("consumed bytes past EOF");
 
-        self.reader.consume(n);
+        self.reader.inner.consume(n);
 
         if self.is_empty() {
             self.finish()?;
@@ -159,7 +213,7 @@ impl FileReader<'_, '_> {
     pub fn copy(&mut self, mut dst: impl Write) -> io::Result<()> {
         while !self.is_empty() {
             let buf = self.fill_buf()?;
-            let n = dst.write(buf)?;
+            let n = try_or_poison!(self.reader, dst.write(buf));
             self.consume(n)?;
         }
 
@@ -173,14 +227,17 @@ impl Read for FileReader<'_, '_> {
             return Ok(0);
         }
 
+        self.reader.check_correct();
+
         if buf.len() as u64 > self.len {
             buf = &mut buf[..self.len as usize];
         }
 
-        let n = self.reader.read(buf)?;
+        let n = try_or_poison!(self.reader, self.reader.inner.read(buf));
         self.len -= n as u64;
 
         if n == 0 {
+            poison!(self.reader);
             return Err(UnexpectedEof.into());
         }
 
@@ -200,21 +257,30 @@ impl FileReader<'_, '_> {
 
         if pad != 0 {
             let mut buf = [0; 8];
-            self.reader.read_exact(&mut buf[pad..])?;
+            try_or_poison!(self.reader, self.reader.inner.read_exact(&mut buf[pad..]));
 
             if buf != [0; 8] {
+                poison!(self.reader);
                 return Err(InvalidData.into());
             }
         }
 
-        read::token(self.reader, &wire::TOK_PAR)
+        try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR));
+
+        #[cfg(debug_assertions)]
+        {
+            // Done with reading this file, allow going back up the chain of readers
+            self.reader.status.ready_parent();
+        }
+
+        Ok(())
     }
 }
 
 /// A directory iterator, yielding a sequence of [Node]s.
 /// It must be fully consumed before reading further from the [DirReader] that produced it, if any.
 pub struct DirReader<'a, 'r> {
-    reader: &'a mut Reader<'r>,
+    reader: ArchiveReader<'a, 'r>,
     /// Previous directory entry name.
     /// We have to hang onto this to enforce name monotonicity.
     prev_name: Option<Vec<u8>>,
@@ -226,7 +292,7 @@ pub struct Entry<'a, 'r> {
 }
 
 impl<'a, 'r> DirReader<'a, 'r> {
-    fn new(reader: &'a mut Reader<'r>) -> Self {
+    fn new(reader: ArchiveReader<'a, 'r>) -> Self {
         Self {
             reader,
             prev_name: None,
@@ -242,23 +308,30 @@ impl<'a, 'r> DirReader<'a, 'r> {
     ///  * You must abandon the entire archive reader on the first error.
     ///  * You must abandon the directory reader upon the first [None].
     ///  * Even if you know the amount of elements up front, you must keep reading until you encounter [None].
-    ///
-    /// TODO(edef): enforce these in `#[cfg(debug_assertions)]`
     #[allow(clippy::should_implement_trait)]
-    pub fn next(&mut self) -> io::Result<Option<Entry>> {
+    pub fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> {
+        self.reader.check_correct();
+
         // COME FROM the previous iteration: if we've already read an entry,
         // read its terminating TOK_PAR here.
         if self.prev_name.is_some() {
-            read::token(self.reader, &wire::TOK_PAR)?;
+            try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR));
         }
 
         // Determine if there are more entries to follow
-        if let wire::Entry::None = read::tag(self.reader)? {
+        if let wire::Entry::None = try_or_poison!(self.reader, read::tag(self.reader.inner)) {
             // We've reached the end of this directory.
+            #[cfg(debug_assertions)]
+            {
+                self.reader.status.ready_parent();
+            }
             return Ok(None);
         }
 
-        let name = read::bytes(self.reader, wire::MAX_NAME_LEN)?;
+        let name = try_or_poison!(
+            self.reader,
+            read::bytes(self.reader.inner, wire::MAX_NAME_LEN)
+        );
 
         if name.is_empty()
             || name.contains(&0)
@@ -266,6 +339,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
             || name == b"."
             || name == b".."
         {
+            poison!(self.reader);
             return Err(InvalidData.into());
         }
 
@@ -276,6 +350,7 @@ impl<'a, 'r> DirReader<'a, 'r> {
             }
             Some(prev_name) => {
                 if *prev_name >= name {
+                    poison!(self.reader);
                     return Err(InvalidData.into());
                 }
 
@@ -283,11 +358,120 @@ impl<'a, 'r> DirReader<'a, 'r> {
             }
         }
 
-        read::token(self.reader, &wire::TOK_NOD)?;
+        try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD));
 
         Ok(Some(Entry {
             name,
-            node: Node::new(&mut self.reader)?,
+            // Don't need to worry about poisoning here: Node::new will do it for us if needed
+            node: Node::new(self.reader.child())?,
         }))
     }
 }
+
+/// We use a stack of statuses to:
+///   * Share poisoned state across all objects from the same underlying reader,
+///     so we can check they are abandoned when an error occurs
+///   * Make sure only the most recently created object is read from, and is fully exhausted
+///     before anything it was created from is used again.
+#[cfg(debug_assertions)]
+enum ArchiveReaderStatus<'a> {
+    StackTop {
+        poisoned: bool,
+        ready: bool,
+    },
+    StackChild {
+        poisoned: &'a mut bool,
+        parent_ready: &'a mut bool,
+        ready: bool,
+    },
+}
+
+#[cfg(debug_assertions)]
+impl ArchiveReaderStatus<'_> {
+    /// Poison all the objects sharing the same reader, to be used when an error occurs
+    fn poison(&mut self) {
+        match self {
+            ArchiveReaderStatus::StackTop { poisoned: x, .. } => *x = true,
+            ArchiveReaderStatus::StackChild { poisoned: x, .. } => **x = true,
+        }
+    }
+
+    /// Mark the parent as ready, allowing it to be used again and preventing this reference to the reader being used again.
+    fn ready_parent(&mut self) {
+        match self {
+            Self::StackTop { ready, .. } => {
+                *ready = false;
+            }
+            Self::StackChild {
+                ready,
+                parent_ready,
+                ..
+            } => {
+                *ready = false;
+                **parent_ready = true;
+            }
+        };
+    }
+
+    fn poisoned(&self) -> bool {
+        match self {
+            Self::StackTop { poisoned, .. } => *poisoned,
+            Self::StackChild { poisoned, .. } => **poisoned,
+        }
+    }
+
+    fn ready(&self) -> bool {
+        match self {
+            Self::StackTop { ready, .. } => *ready,
+            Self::StackChild { ready, .. } => *ready,
+        }
+    }
+}
+
+impl<'a, 'r> ArchiveReader<'a, 'r> {
+    /// Create a new child reader from this one.
+    /// In debug mode, this reader will panic if called before the new child is exhausted / calls `ready_parent`
+    fn child(&mut self) -> ArchiveReader<'_, 'r> {
+        ArchiveReader {
+            inner: self.inner,
+            #[cfg(debug_assertions)]
+            status: match &mut self.status {
+                ArchiveReaderStatus::StackTop { poisoned, ready } => {
+                    *ready = false;
+                    ArchiveReaderStatus::StackChild {
+                        poisoned,
+                        parent_ready: ready,
+                        ready: true,
+                    }
+                }
+                ArchiveReaderStatus::StackChild {
+                    poisoned, ready, ..
+                } => {
+                    *ready = false;
+                    ArchiveReaderStatus::StackChild {
+                        poisoned,
+                        parent_ready: ready,
+                        ready: true,
+                    }
+                }
+            },
+        }
+    }
+
+    /// Check the reader is in the correct status.
+    /// Only does anything when debug assertions are on.
+    #[inline(always)]
+    fn check_correct(&self) {
+        #[cfg(debug_assertions)]
+        {
+            debug_assert!(
+                !self.status.poisoned(),
+                "Archive reader used after it was meant to be abandoned!"
+            );
+            debug_assert!(
+                self.status.ready(),
+                "Non-ready archive reader used! (Should've been reading from something else)"
+            )
+        }
+    }
+}
diff --git a/tvix/nix-compat/src/nar/reader/test.rs b/tvix/nix-compat/src/nar/reader/test.rs
index fd0d6a9f5afd..02dc4767c916 100644
--- a/tvix/nix-compat/src/nar/reader/test.rs
+++ b/tvix/nix-compat/src/nar/reader/test.rs
@@ -46,75 +46,233 @@ fn complicated() {
     match node {
         nar::reader::Node::Directory(mut dir_reader) => {
             // first entry is .keep, an empty regular file.
-            let entry = dir_reader
-                .next()
-                .expect("next must succeed")
-                .expect("must be some");
-
-            assert_eq!(&b".keep"[..], &entry.name);
-
-            match entry.node {
-                nar::reader::Node::File {
-                    executable,
-                    mut reader,
-                } => {
-                    assert!(!executable);
-                    assert_eq!(reader.read(&mut [0]).unwrap(), 0);
+            must_read_file(
+                ".keep",
+                dir_reader
+                    .next()
+                    .expect("next must succeed")
+                    .expect("must be some"),
+            );
+
+            // second entry is aa, a symlink to /nix/store/somewhereelse
+            must_be_symlink(
+                "aa",
+                "/nix/store/somewhereelse",
+                dir_reader
+                    .next()
+                    .expect("next must be some")
+                    .expect("must be some"),
+            );
+
+            {
+                // third entry is a directory called "keep"
+                let entry = dir_reader
+                    .next()
+                    .expect("next must be some")
+                    .expect("must be some");
+
+                assert_eq!(&b"keep"[..], &entry.name);
+
+                match entry.node {
+                    nar::reader::Node::Directory(mut subdir_reader) => {
+                        {
+                            // first entry is .keep, an empty regular file.
+                            let entry = subdir_reader
+                                .next()
+                                .expect("next must succeed")
+                                .expect("must be some");
+
+                            must_read_file(".keep", entry);
+                        }
+
+                        // we must read the None
+                        assert!(
+                            subdir_reader.next().expect("next must succeed").is_none(),
+                            "keep directory contains only .keep"
+                        );
+                    }
+                    _ => panic!("unexpected type for keep/.keep"),
                 }
-                _ => panic!("unexpected type for .keep"),
-            }
+            };
+
+            // reading more entries yields None (and we actually must read until this)
+            assert!(dir_reader.next().expect("must succeed").is_none());
+        }
+        _ => panic!("unexpected type"),
+    }
+}
+
+#[test]
+#[should_panic]
+fn file_read_abandoned() {
+    let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar"));
+    let node = nar::reader::open(&mut f).unwrap();
+
+    match node {
+        nar::reader::Node::Directory(mut dir_reader) => {
+            // first entry is .keep, an empty regular file.
+            {
+                let entry = dir_reader
+                    .next()
+                    .expect("next must succeed")
+                    .expect("must be some");
+
+                assert_eq!(&b".keep"[..], &entry.name);
+                // don't bother to finish reading it.
+            };
+
+            // this should panic (not return an error), because we are meant to abandon the archive reader now.
+            assert!(dir_reader.next().expect("must succeed").is_none());
+        }
+        _ => panic!("unexpected type"),
+    }
+}
+
+#[test]
+#[should_panic]
+fn dir_read_abandoned() {
+    let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar"));
+    let node = nar::reader::open(&mut f).unwrap();
+
+    match node {
+        nar::reader::Node::Directory(mut dir_reader) => {
+            // first entry is .keep, an empty regular file.
+            must_read_file(
+                ".keep",
+                dir_reader
+                    .next()
+                    .expect("next must succeed")
+                    .expect("must be some"),
+            );
 
             // second entry is aa, a symlink to /nix/store/somewhereelse
-            let entry = dir_reader
-                .next()
-                .expect("next must be some")
-                .expect("must be some");
+            must_be_symlink(
+                "aa",
+                "/nix/store/somewhereelse",
+                dir_reader
+                    .next()
+                    .expect("next must be some")
+                    .expect("must be some"),
+            );
 
-            assert_eq!(&b"aa"[..], &entry.name);
+            {
+                // third entry is a directory called "keep"
+                let entry = dir_reader
+                    .next()
+                    .expect("next must be some")
+                    .expect("must be some");
 
-            match entry.node {
-                nar::reader::Node::Symlink { target } => {
-                    assert_eq!(&b"/nix/store/somewhereelse"[..], &target);
+                assert_eq!(&b"keep"[..], &entry.name);
+
+                match entry.node {
+                    nar::reader::Node::Directory(_) => {
+                        // don't finish using it, which poisons the archive reader
+                    }
+                    _ => panic!("unexpected type for keep/.keep"),
                 }
-                _ => panic!("unexpected type for aa"),
-            }
-
-            // third entry is a directory called "keep"
-            let entry = dir_reader
-                .next()
-                .expect("next must be some")
-                .expect("must be some");
-
-            assert_eq!(&b"keep"[..], &entry.name);
-
-            match entry.node {
-                nar::reader::Node::Directory(mut subdir_reader) => {
-                    // first entry is .keep, an empty regular file.
-                    let entry = subdir_reader
-                        .next()
-                        .expect("next must succeed")
-                        .expect("must be some");
-
-                    // … it contains a single .keep, an empty regular file.
-                    assert_eq!(&b".keep"[..], &entry.name);
-
-                    match entry.node {
-                        nar::reader::Node::File {
-                            executable,
-                            mut reader,
-                        } => {
-                            assert!(!executable);
-                            assert_eq!(reader.read(&mut [0]).unwrap(), 0);
-                        }
-                        _ => panic!("unexpected type for keep/.keep"),
+            };
+
+            // this should panic, because we didn't finish reading the child subdirectory
+            assert!(dir_reader.next().expect("must succeed").is_none());
+        }
+        _ => panic!("unexpected type"),
+    }
+}
+
+#[test]
+#[should_panic]
+fn dir_read_after_none() {
+    let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar"));
+    let node = nar::reader::open(&mut f).unwrap();
+
+    match node {
+        nar::reader::Node::Directory(mut dir_reader) => {
+            // first entry is .keep, an empty regular file.
+            must_read_file(
+                ".keep",
+                dir_reader
+                    .next()
+                    .expect("next must succeed")
+                    .expect("must be some"),
+            );
+
+            // second entry is aa, a symlink to /nix/store/somewhereelse
+            must_be_symlink(
+                "aa",
+                "/nix/store/somewhereelse",
+                dir_reader
+                    .next()
+                    .expect("next must be some")
+                    .expect("must be some"),
+            );
+
+            {
+                // third entry is a directory called "keep"
+                let entry = dir_reader
+                    .next()
+                    .expect("next must be some")
+                    .expect("must be some");
+
+                assert_eq!(&b"keep"[..], &entry.name);
+
+                match entry.node {
+                    nar::reader::Node::Directory(mut subdir_reader) => {
+                        // first entry is .keep, an empty regular file.
+                        must_read_file(
+                            ".keep",
+                            subdir_reader
+                                .next()
+                                .expect("next must succeed")
+                                .expect("must be some"),
+                        );
+
+                        // we must read the None
+                        assert!(
+                            subdir_reader.next().expect("next must succeed").is_none(),
+                            "keep directory contains only .keep"
+                        );
                     }
+                    _ => panic!("unexpected type for keep/.keep"),
                 }
-                _ => panic!("unexpected type for keep/.keep"),
-            }
+            };
 
             // reading more entries yields None (and we actually must read until this)
             assert!(dir_reader.next().expect("must succeed").is_none());
+
+            // this should panic, because we already got a none so we're meant to stop.
+            dir_reader.next().unwrap();
+            unreachable!()
         }
         _ => panic!("unexpected type"),
     }
 }
+
+fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) {
+    assert_eq!(name.as_bytes(), &entry.name);
+
+    match entry.node {
+        nar::reader::Node::File {
+            executable,
+            mut reader,
+        } => {
+            assert!(!executable);
+            assert_eq!(reader.read(&mut [0]).unwrap(), 0);
+        }
+        _ => panic!("unexpected type for {}", name),
+    }
+}
+
+fn must_be_symlink(
+    name: &'static str,
+    exp_target: &'static str,
+    entry: nar::reader::Entry<'_, '_>,
+) {
+    assert_eq!(name.as_bytes(), &entry.name);
+
+    match entry.node {
+        nar::reader::Node::Symlink { target } => {
+            assert_eq!(exp_target.as_bytes(), &target);
+        }
+        _ => panic!("unexpected type for {}", name),
+    }
+}