tvix/nix-compat/src/nar/reader/async/mod.rs - depot

use std::{
    mem::MaybeUninit,
    pin::Pin,
    task::{self, Poll},
};

use tokio::io::{self, AsyncBufRead, AsyncRead, ErrorKind::InvalidData};

// Required reading for understanding this module.
use crate::{
    nar::{self, wire::PadPar},
    wire::{self, BytesReader},
};

mod read;
#[cfg(test)]
mod test;

pub type Reader<'a> = dyn AsyncBufRead + Unpin + Send + 'a;

/// Start reading a NAR file from `reader`.
pub async fn open<'a, 'r>(reader: &'a mut Reader<'r>) -> io::Result<Node<'a, 'r>> {
    read::token(reader, &nar::wire::TOK_NAR).await?;
    Node::new(reader).await
}

pub enum Node<'a, 'r: 'a> {
    Symlink {
        target: Vec<u8>,
    },
    File {
        executable: bool,
        reader: FileReader<'a, 'r>,
    },
    Directory(DirReader<'a, 'r>),
}

impl<'a, 'r: 'a> Node<'a, 'r> {
    /// Start reading a [Node], matching the next [wire::Node].
    ///
    /// Reading the terminating [wire::TOK_PAR] is done immediately for [Node::Symlink],
    /// but is otherwise left to [DirReader] or [BytesReader].
    async fn new(reader: &'a mut Reader<'r>) -> io::Result<Self> {
        Ok(match read::tag(reader).await? {
            nar::wire::Node::Sym => {
                let target = wire::read_bytes(reader, 1..=nar::wire::MAX_TARGET_LEN).await?;

                if target.contains(&0) {
                    return Err(InvalidData.into());
                }

                read::token(reader, &nar::wire::TOK_PAR).await?;

                Node::Symlink { target }
            }
            tag @ (nar::wire::Node::Reg | nar::wire::Node::Exe) => Node::File {
                executable: tag == nar::wire::Node::Exe,
                reader: FileReader {
                    inner: BytesReader::new_internal(reader, ..).await?,
                },
            },
            nar::wire::Node::Dir => Node::Directory(DirReader::new(reader)),
        })
    }
}

/// File contents, readable through the [AsyncRead] trait.
///
/// It comes with some caveats:
///  * You must always read the entire file, unless you intend to abandon the entire archive reader.
///  * You must abandon the entire archive reader upon the first error.
///
/// It's fine to read exactly `reader.len()` bytes without ever seeing an explicit EOF.
pub struct FileReader<'a, 'r> {
    inner: BytesReader<&'a mut Reader<'r>, PadPar>,
}

impl<'a, 'r> FileReader<'a, 'r> {
    pub fn is_empty(&self) -> bool {
        self.len() == 0
    }

    pub fn len(&self) -> u64 {
        self.inner.len()
    }
}

impl<'a, 'r> AsyncRead for FileReader<'a, 'r> {
    fn poll_read(
        self: Pin<&mut Self>,
        cx: &mut task::Context,
        buf: &mut io::ReadBuf,
    ) -> Poll<io::Result<()>> {
        Pin::new(&mut self.get_mut().inner).poll_read(cx, buf)
    }
}

impl<'a, 'r> AsyncBufRead for FileReader<'a, 'r> {
    fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut task::Context) -> Poll<io::Result<&[u8]>> {
        Pin::new(&mut self.get_mut().inner).poll_fill_buf(cx)
    }

    fn consume(self: Pin<&mut Self>, amt: usize) {
        Pin::new(&mut self.get_mut().inner).consume(amt)
    }
}

/// A directory iterator, yielding a sequence of [Node]s.
/// It must be fully consumed before reading further from the [DirReader] that produced it, if any.
pub struct DirReader<'a, 'r> {
    reader: &'a mut Reader<'r>,
    /// Previous directory entry name.
    /// We have to hang onto this to enforce name monotonicity.
    prev_name: Vec<u8>,
}

pub struct Entry<'a, 'r> {
    pub name: &'a [u8],
    pub node: Node<'a, 'r>,
}

impl<'a, 'r> DirReader<'a, 'r> {
    fn new(reader: &'a mut Reader<'r>) -> Self {
        Self {
            reader,
            prev_name: vec![],
        }
    }

    /// Read the next [Entry] from the directory.
    ///
    /// We explicitly don't implement [Iterator], since treating this as
    /// a regular Rust iterator will surely lead you astray.
    ///
    ///  * You must always consume the entire iterator, unless you abandon the entire archive reader.
    ///  * You must abandon the entire archive reader on the first error.
    ///  * You must abandon the directory reader upon the first [None].
    ///  * Even if you know the amount of elements up front, you must keep reading until you encounter [None].
    pub async fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> {
        // COME FROM the previous iteration: if we've already read an entry,
        // read its terminating TOK_PAR here.
        if !self.prev_name.is_empty() {
            read::token(self.reader, &nar::wire::TOK_PAR).await?;
        }

        if let nar::wire::Entry::None = read::tag(self.reader).await? {
            return Ok(None);
        }

        let mut name = [MaybeUninit::uninit(); nar::wire::MAX_NAME_LEN + 1];
        let name =
            wire::read_bytes_buf(self.reader, &mut name, 1..=nar::wire::MAX_NAME_LEN).await?;

        if name.contains(&0) || name.contains(&b'/') || name == b"." || name == b".." {
            return Err(InvalidData.into());
        }

        // Enforce strict monotonicity of directory entry names.
        if &self.prev_name[..] >= name {
            return Err(InvalidData.into());
        }

        self.prev_name.clear();
        self.prev_name.extend_from_slice(name);

        read::token(self.reader, &nar::wire::TOK_NOD).await?;

        Ok(Some(Entry {
            name: &self.prev_name,
            node: Node::new(self.reader).await?,
        }))
    }
}