diff options
author | edef <edef@edef.eu> | 2024-05-08T07·13+0000 |
---|---|---|
committer | edef <edef@edef.eu> | 2024-05-08T15·30+0000 |
commit | 31d73cd443311c5ef4bfbd8c2b66e3b9691340ec (patch) | |
tree | 906c13131daf87bcc726db9453c8226d68a3728b | |
parent | 17a7dac94f29d8151ecebfbf11d5b83cf0c4f415 (diff) |
refactor(nix-compat/nar/reader): reuse prev_name allocation r/8095
We reuse the prev_name allocation for Entry, instead of allocating and returning a separate Vec. We encode the `prev_name: None` case as an empty vector, since we don't allow empty names anyway, and the sorting is equivalent. Change-Id: I975b37ff873805f5ff099bc82128706891052247 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11607 Reviewed-by: Brian Olsen <me@griff.name> Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
-rw-r--r-- | tvix/nix-compat/src/nar/reader/async/mod.rs | 31 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/async/test.rs | 12 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/mod.rs | 31 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/read.rs | 32 | ||||
-rw-r--r-- | tvix/nix-compat/src/nar/reader/test.rs | 12 | ||||
-rw-r--r-- | tvix/nix-compat/src/wire/bytes/mod.rs | 1 | ||||
-rw-r--r-- | tvix/store/src/nar/import.rs | 2 | ||||
-rw-r--r-- | tvix/tools/crunch-v2/src/main.rs | 2 |
8 files changed, 73 insertions, 50 deletions
diff --git a/tvix/nix-compat/src/nar/reader/async/mod.rs b/tvix/nix-compat/src/nar/reader/async/mod.rs index 048283a67cb5..0808fba38c47 100644 --- a/tvix/nix-compat/src/nar/reader/async/mod.rs +++ b/tvix/nix-compat/src/nar/reader/async/mod.rs @@ -1,4 +1,5 @@ use std::{ + mem::MaybeUninit, pin::Pin, task::{self, Poll}, }; @@ -110,11 +111,11 @@ pub struct DirReader<'a, 'r> { reader: &'a mut Reader<'r>, /// Previous directory entry name. /// We have to hang onto this to enforce name monotonicity. - prev_name: Option<Vec<u8>>, + prev_name: Vec<u8>, } pub struct Entry<'a, 'r> { - pub name: Vec<u8>, + pub name: &'a [u8], pub node: Node<'a, 'r>, } @@ -122,7 +123,7 @@ impl<'a, 'r> DirReader<'a, 'r> { fn new(reader: &'a mut Reader<'r>) -> Self { Self { reader, - prev_name: None, + prev_name: vec![], } } @@ -138,7 +139,7 @@ impl<'a, 'r> DirReader<'a, 'r> { pub async fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> { // COME FROM the previous iteration: if we've already read an entry, // read its terminating TOK_PAR here. - if self.prev_name.is_some() { + if !self.prev_name.is_empty() { read::token(self.reader, &nar::wire::TOK_PAR).await?; } @@ -146,30 +147,26 @@ impl<'a, 'r> DirReader<'a, 'r> { return Ok(None); } - let name = wire::read_bytes(self.reader, 1..=nar::wire::MAX_NAME_LEN).await?; + let mut name = [MaybeUninit::uninit(); nar::wire::MAX_NAME_LEN + 1]; + let name = + wire::read_bytes_buf(self.reader, &mut name, 1..=nar::wire::MAX_NAME_LEN).await?; if name.contains(&0) || name.contains(&b'/') || name == b"." || name == b".." { return Err(InvalidData.into()); } // Enforce strict monotonicity of directory entry names. - match &mut self.prev_name { - None => { - self.prev_name = Some(name.clone()); - } - Some(prev_name) => { - if *prev_name >= name { - return Err(InvalidData.into()); - } - - name[..].clone_into(prev_name); - } + if &self.prev_name[..] >= name { + return Err(InvalidData.into()); } + self.prev_name.clear(); + self.prev_name.extend_from_slice(name); + read::token(self.reader, &nar::wire::TOK_NOD).await?; Ok(Some(Entry { - name, + name: &self.prev_name, node: Node::new(self.reader).await?, })) } diff --git a/tvix/nix-compat/src/nar/reader/async/test.rs b/tvix/nix-compat/src/nar/reader/async/test.rs index 58bb651fca21..7bc1f8942f50 100644 --- a/tvix/nix-compat/src/nar/reader/async/test.rs +++ b/tvix/nix-compat/src/nar/reader/async/test.rs @@ -80,7 +80,7 @@ async fn complicated() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(mut subdir_reader) => { @@ -133,7 +133,7 @@ async fn file_read_abandoned() { .expect("next must succeed") .expect("must be some"); - assert_eq!(&b".keep"[..], &entry.name); + assert_eq!(b".keep", entry.name); // don't bother to finish reading it. }; @@ -183,7 +183,7 @@ async fn dir_read_abandoned() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(_) => { @@ -239,7 +239,7 @@ async fn dir_read_after_none() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(mut subdir_reader) => { @@ -280,7 +280,7 @@ async fn dir_read_after_none() { } async fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { - assert_eq!(name.as_bytes(), &entry.name); + assert_eq!(name.as_bytes(), entry.name); match entry.node { nar::reader::Node::File { @@ -299,7 +299,7 @@ fn must_be_symlink( exp_target: &'static str, entry: nar::reader::Entry<'_, '_>, ) { - assert_eq!(name.as_bytes(), &entry.name); + assert_eq!(name.as_bytes(), entry.name); match entry.node { nar::reader::Node::Symlink { target } => { diff --git a/tvix/nix-compat/src/nar/reader/mod.rs b/tvix/nix-compat/src/nar/reader/mod.rs index bddf1750800c..9e9237ead363 100644 --- a/tvix/nix-compat/src/nar/reader/mod.rs +++ b/tvix/nix-compat/src/nar/reader/mod.rs @@ -264,11 +264,11 @@ pub struct DirReader<'a, 'r> { reader: ArchiveReader<'a, 'r>, /// Previous directory entry name. /// We have to hang onto this to enforce name monotonicity. - prev_name: Option<Vec<u8>>, + prev_name: Vec<u8>, } pub struct Entry<'a, 'r> { - pub name: Vec<u8>, + pub name: &'a [u8], pub node: Node<'a, 'r>, } @@ -276,7 +276,7 @@ impl<'a, 'r> DirReader<'a, 'r> { fn new(reader: ArchiveReader<'a, 'r>) -> Self { Self { reader, - prev_name: None, + prev_name: vec![], } } @@ -295,7 +295,7 @@ impl<'a, 'r> DirReader<'a, 'r> { // COME FROM the previous iteration: if we've already read an entry, // read its terminating TOK_PAR here. - if self.prev_name.is_some() { + if !self.prev_name.is_empty() { try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR)); } @@ -306,9 +306,10 @@ impl<'a, 'r> DirReader<'a, 'r> { return Ok(None); } + let mut name = [0; wire::MAX_NAME_LEN + 1]; let name = try_or_poison!( self.reader, - read::bytes(self.reader.inner, wire::MAX_NAME_LEN) + read::bytes_buf(self.reader.inner, &mut name, wire::MAX_NAME_LEN) ); if name.is_empty() @@ -322,24 +323,18 @@ impl<'a, 'r> DirReader<'a, 'r> { } // Enforce strict monotonicity of directory entry names. - match &mut self.prev_name { - None => { - self.prev_name = Some(name.clone()); - } - Some(prev_name) => { - if *prev_name >= name { - self.reader.status.poison(); - return Err(InvalidData.into()); - } - - name[..].clone_into(prev_name); - } + if &self.prev_name[..] >= name { + self.reader.status.poison(); + return Err(InvalidData.into()); } + self.prev_name.clear(); + self.prev_name.extend_from_slice(name); + try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD)); Ok(Some(Entry { - name, + name: &self.prev_name, // Don't need to worry about poisoning here: Node::new will do it for us if needed node: Node::new(self.reader.child())?, })) diff --git a/tvix/nix-compat/src/nar/reader/read.rs b/tvix/nix-compat/src/nar/reader/read.rs index 1ce161376424..9938581f2a2e 100644 --- a/tvix/nix-compat/src/nar/reader/read.rs +++ b/tvix/nix-compat/src/nar/reader/read.rs @@ -15,6 +15,38 @@ pub fn u64(reader: &mut Reader) -> io::Result<u64> { Ok(u64::from_le_bytes(buf)) } +/// Consume a byte string from the reader into a provided buffer, +/// returning the data bytes. +pub fn bytes_buf<'a, const N: usize>( + reader: &mut Reader, + buf: &'a mut [u8; N], + max_len: usize, +) -> io::Result<&'a [u8]> { + assert_eq!(N % 8, 0); + assert!(max_len <= N); + + // read the length, and reject excessively large values + let len = self::u64(reader)?; + if len > max_len as u64 { + return Err(InvalidData.into()); + } + // we know the length fits in a usize now + let len = len as usize; + + // read the data and padding into a buffer + let buf_len = (len + 7) & !7; + reader.read_exact(&mut buf[..buf_len])?; + + // verify that the padding is all zeroes + for &b in &buf[len..buf_len] { + if b != 0 { + return Err(InvalidData.into()); + } + } + + Ok(&buf[..len]) +} + /// Consume a byte string of up to `max_len` bytes from the reader. pub fn bytes(reader: &mut Reader, max_len: usize) -> io::Result<Vec<u8>> { assert!(max_len <= isize::MAX as usize); diff --git a/tvix/nix-compat/src/nar/reader/test.rs b/tvix/nix-compat/src/nar/reader/test.rs index 02dc4767c916..63e4fb289ffc 100644 --- a/tvix/nix-compat/src/nar/reader/test.rs +++ b/tvix/nix-compat/src/nar/reader/test.rs @@ -71,7 +71,7 @@ fn complicated() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(mut subdir_reader) => { @@ -117,7 +117,7 @@ fn file_read_abandoned() { .expect("next must succeed") .expect("must be some"); - assert_eq!(&b".keep"[..], &entry.name); + assert_eq!(b".keep", entry.name); // don't bother to finish reading it. }; @@ -162,7 +162,7 @@ fn dir_read_abandoned() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(_) => { @@ -213,7 +213,7 @@ fn dir_read_after_none() { .expect("next must be some") .expect("must be some"); - assert_eq!(&b"keep"[..], &entry.name); + assert_eq!(b"keep", entry.name); match entry.node { nar::reader::Node::Directory(mut subdir_reader) => { @@ -248,7 +248,7 @@ fn dir_read_after_none() { } fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { - assert_eq!(name.as_bytes(), &entry.name); + assert_eq!(name.as_bytes(), entry.name); match entry.node { nar::reader::Node::File { @@ -267,7 +267,7 @@ fn must_be_symlink( exp_target: &'static str, entry: nar::reader::Entry<'_, '_>, ) { - assert_eq!(name.as_bytes(), &entry.name); + assert_eq!(name.as_bytes(), entry.name); match entry.node { nar::reader::Node::Symlink { target } => { diff --git a/tvix/nix-compat/src/wire/bytes/mod.rs b/tvix/nix-compat/src/wire/bytes/mod.rs index f48b5000a51d..2ed071e37985 100644 --- a/tvix/nix-compat/src/wire/bytes/mod.rs +++ b/tvix/nix-compat/src/wire/bytes/mod.rs @@ -82,7 +82,6 @@ where Ok(buf) } -#[allow(dead_code)] pub(crate) async fn read_bytes_buf<'a, const N: usize, R: ?Sized>( reader: &mut R, buf: &'a mut [MaybeUninit<u8>; N], diff --git a/tvix/store/src/nar/import.rs b/tvix/store/src/nar/import.rs index 7a9f1231e76a..3d7c50014aa8 100644 --- a/tvix/store/src/nar/import.rs +++ b/tvix/store/src/nar/import.rs @@ -87,7 +87,7 @@ where let mut path = path.clone(); // valid NAR names are valid castore names - path.try_push(&entry.name) + path.try_push(entry.name) .expect("Tvix bug: failed to join name"); let entry = Box::pin(produce_nar_inner( diff --git a/tvix/tools/crunch-v2/src/main.rs b/tvix/tools/crunch-v2/src/main.rs index a5d538f6beac..5be8c28e293f 100644 --- a/tvix/tools/crunch-v2/src/main.rs +++ b/tvix/tools/crunch-v2/src/main.rs @@ -147,7 +147,7 @@ fn ingest(node: nar::Node, name: Vec<u8>, avg_chunk_size: u32) -> Result<proto:: let mut symlinks = vec![]; while let Some(node) = reader.next()? { - match ingest(node.node, node.name, avg_chunk_size)? { + match ingest(node.node, node.name.to_owned(), avg_chunk_size)? { proto::path::Node::Directory(node) => { directories.push(node); } |