about summary refs log tree commit diff
diff options
context:
space:
mode:
authoredef <edef@edef.eu>2022-12-04T16·37+0000
committerclbot <clbot@tvl.fyi>2022-12-20T15·59+0000
commit15e98314cc1b8a35de70dbb1cfdb81d3ef7e7e9a (patch)
tree492c3b4716aa616e1e611ce8858d72457a6e0b4f
parentde4dd15eae5a44fc4cac0308328386292d1a890f (diff)
feat(tvix/nar): init writer r/5428
Change-Id: I101f35840cb14c58af708c91e59f997f6d167f75
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7505
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
Autosubmit: tazjin <tazjin@tvl.su>
-rw-r--r--tvix/nar/Cargo.lock7
-rw-r--r--tvix/nar/Cargo.toml4
-rw-r--r--tvix/nar/src/lib.rs157
-rw-r--r--tvix/nar/src/wire.rs46
4 files changed, 214 insertions, 0 deletions
diff --git a/tvix/nar/Cargo.lock b/tvix/nar/Cargo.lock
new file mode 100644
index 000000000000..1288e13db8de
--- /dev/null
+++ b/tvix/nar/Cargo.lock
@@ -0,0 +1,7 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "tvix_nar"
+version = "0.0.0"
diff --git a/tvix/nar/Cargo.toml b/tvix/nar/Cargo.toml
new file mode 100644
index 000000000000..f27e9a0cb8a7
--- /dev/null
+++ b/tvix/nar/Cargo.toml
@@ -0,0 +1,4 @@
+[package]
+name = "tvix_nar"
+version = "0.0.0"
+edition = "2021"
diff --git a/tvix/nar/src/lib.rs b/tvix/nar/src/lib.rs
new file mode 100644
index 000000000000..89cf6fdb1931
--- /dev/null
+++ b/tvix/nar/src/lib.rs
@@ -0,0 +1,157 @@
+use std::io::{self, BufRead, ErrorKind::UnexpectedEof, Write};
+
+mod wire;
+
+pub type Writer<'a> = dyn Write + 'a;
+
+pub fn open<'a, 'w: 'a>(writer: &'a mut Writer<'w>) -> io::Result<Node<'a, 'w>> {
+    let mut node = Node { writer };
+    node.write(&wire::TOK_NAR)?;
+    Ok(node)
+}
+
+pub struct Node<'a, 'w: 'a> {
+    writer: &'a mut Writer<'w>,
+}
+
+impl<'a, 'w> Node<'a, 'w> {
+    fn write(&mut self, data: &[u8]) -> io::Result<()> {
+        self.writer.write_all(data)
+    }
+
+    fn pad(&mut self, n: u64) -> io::Result<()> {
+        match (n & 7) as usize {
+            0 => Ok(()),
+            n => self.write(&[0; 8][n..]),
+        }
+    }
+
+    pub fn symlink(mut self, target: &str) -> io::Result<()> {
+        debug_assert!(
+            target.len() <= wire::MAX_TARGET_LEN,
+            "target.len() > {}",
+            wire::MAX_TARGET_LEN
+        );
+        debug_assert!(
+            !target.contains('\0'),
+            "invalid target characters: {target:?}"
+        );
+        debug_assert!(!target.is_empty(), "empty target");
+
+        self.write(&wire::TOK_SYM)?;
+        self.write(&target.len().to_le_bytes())?;
+        self.write(target.as_bytes())?;
+        self.pad(target.len() as u64)?;
+        self.write(&wire::TOK_PAR)?;
+        Ok(())
+    }
+
+    pub fn file(mut self, executable: bool, size: u64, reader: &mut dyn BufRead) -> io::Result<()> {
+        self.write(if executable {
+            &wire::TOK_EXE
+        } else {
+            &wire::TOK_REG
+        })?;
+
+        self.write(&size.to_le_bytes())?;
+
+        let mut need = size;
+        while need != 0 {
+            let data = reader.fill_buf()?;
+
+            if data.is_empty() {
+                return Err(UnexpectedEof.into());
+            }
+
+            let n = need.min(data.len() as u64) as usize;
+            self.write(&data[..n])?;
+
+            need -= n as u64;
+            reader.consume(n);
+        }
+
+        self.pad(size)?;
+        self.write(&wire::TOK_PAR)?;
+
+        Ok(())
+    }
+
+    pub fn directory(mut self) -> io::Result<Directory<'a, 'w>> {
+        self.write(&wire::TOK_DIR)?;
+        Ok(Directory::new(self))
+    }
+}
+
+#[cfg(debug_assertions)]
+type Name = String;
+#[cfg(not(debug_assertions))]
+type Name = ();
+
+fn into_name(_name: &str) -> Name {
+    #[cfg(debug_assertions)]
+    _name.to_owned()
+}
+
+pub struct Directory<'a, 'w> {
+    node: Node<'a, 'w>,
+    prev_name: Option<Name>,
+}
+
+impl<'a, 'w> Directory<'a, 'w> {
+    fn new(node: Node<'a, 'w>) -> Self {
+        Self {
+            node,
+            prev_name: None,
+        }
+    }
+
+    pub fn entry(&mut self, name: &str) -> io::Result<Node<'_, 'w>> {
+        debug_assert!(
+            name.len() <= wire::MAX_NAME_LEN,
+            "name.len() > {}",
+            wire::MAX_NAME_LEN
+        );
+        debug_assert!(!["", ".", ".."].contains(&name), "invalid name: {name:?}");
+        debug_assert!(
+            !name.contains(['/', '\0']),
+            "invalid name characters: {name:?}"
+        );
+
+        match self.prev_name {
+            None => {
+                self.prev_name = Some(into_name(name));
+            }
+            Some(ref mut _prev_name) => {
+                #[cfg(debug_assertions)]
+                {
+                    assert!(
+                        &**_prev_name < name,
+                        "misordered names: {_prev_name:?} >= {name:?}"
+                    );
+                    _prev_name.clear();
+                    _prev_name.push_str(name);
+                }
+                self.node.write(&wire::TOK_PAR)?;
+            }
+        }
+
+        self.node.write(&wire::TOK_ENT)?;
+        self.node.write(&name.len().to_le_bytes())?;
+        self.node.write(name.as_bytes())?;
+        self.node.pad(name.len() as u64)?;
+        self.node.write(&wire::TOK_NOD)?;
+
+        Ok(Node {
+            writer: &mut *self.node.writer,
+        })
+    }
+
+    pub fn close(mut self) -> io::Result<()> {
+        if self.prev_name.is_some() {
+            self.node.write(&wire::TOK_PAR)?;
+        }
+
+        self.node.write(&wire::TOK_PAR)?;
+        Ok(())
+    }
+}
diff --git a/tvix/nar/src/wire.rs b/tvix/nar/src/wire.rs
new file mode 100644
index 000000000000..98581ae3aa7c
--- /dev/null
+++ b/tvix/nar/src/wire.rs
@@ -0,0 +1,46 @@
+pub const MAX_NAME_LEN: usize = 255;
+pub const MAX_TARGET_LEN: usize = 4095;
+
+#[cfg(test)]
+fn token(xs: &[&str]) -> Vec<u8> {
+    let mut out = vec![];
+    for x in xs {
+        let len = x.len() as u64;
+        out.extend_from_slice(&len.to_le_bytes());
+        out.extend_from_slice(x.as_bytes());
+
+        let n = x.len() & 7;
+        if n != 0 {
+            const ZERO: [u8; 8] = [0; 8];
+            out.extend_from_slice(&ZERO[n..]);
+        }
+    }
+    out
+}
+
+pub const TOK_NAR: [u8; 56] = *b"\x0d\0\0\0\0\0\0\0nix-archive-1\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0";
+pub const TOK_REG: [u8; 32] = *b"\x07\0\0\0\0\0\0\0regular\0\x08\0\0\0\0\0\0\0contents";
+pub const TOK_EXE: [u8; 64] = *b"\x07\0\0\0\0\0\0\0regular\0\x0a\0\0\0\0\0\0\0executable\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0contents";
+pub const TOK_SYM: [u8; 32] = *b"\x07\0\0\0\0\0\0\0symlink\0\x06\0\0\0\0\0\0\0target\0\0";
+pub const TOK_DIR: [u8; 24] = *b"\x09\0\0\0\0\0\0\0directory\0\0\0\0\0\0\0";
+pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0name\0\0\0\0";
+pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0";
+pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0";
+
+#[test]
+fn tokens() {
+    let cases: &[(&[u8], &[&str])] = &[
+        (&TOK_NAR, &["nix-archive-1", "(", "type"]),
+        (&TOK_REG, &["regular", "contents"]),
+        (&TOK_EXE, &["regular", "executable", "", "contents"]),
+        (&TOK_SYM, &["symlink", "target"]),
+        (&TOK_DIR, &["directory"]),
+        (&TOK_ENT, &["entry", "(", "name"]),
+        (&TOK_NOD, &["node", "(", "type"]),
+        (&TOK_PAR, &[")"]),
+    ];
+
+    for &(tok, xs) in cases {
+        assert_eq!(tok, token(xs));
+    }
+}