about summary refs log tree commit diff
path: root/tvix/nix-compat/src/nar
diff options
context:
space:
mode:
authoredef <edef@edef.eu>2023-10-17T22·29+0000
committeredef <edef@edef.eu>2023-10-18T11·40+0000
commit08b98b7503edbde5277ce11faf48b94ca2f1c0b7 (patch)
treea75705dbd33c8a265d47b7a8c9b4926aacab5d8d /tvix/nix-compat/src/nar
parent57b69baf7c8891f0032ef70415482f9a38396bb7 (diff)
docs(tvix/nix-compat/nar): document the wire format r/6852
Change-Id: I6c8e23bad27fa6ada1b8973482b4d99190cf050d
Reviewed-on: https://cl.tvl.fyi/c/depot/+/9767
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
Reviewed-by: raitobezarius <tvl@lahfa.xyz>
Diffstat (limited to 'tvix/nix-compat/src/nar')
-rw-r--r--tvix/nix-compat/src/nar/writer/wire.rs68
1 files changed, 66 insertions, 2 deletions
diff --git a/tvix/nix-compat/src/nar/writer/wire.rs b/tvix/nix-compat/src/nar/writer/wire.rs
index 98581ae3aa7c..a6c19f0759c3 100644
--- a/tvix/nix-compat/src/nar/writer/wire.rs
+++ b/tvix/nix-compat/src/nar/writer/wire.rs
@@ -1,4 +1,68 @@
+//! NAR wire format, without I/O details, since those differ between
+//! the synchronous and asynchronous implementations.
+//!
+//! The wire format is an S-expression format, encoded onto the wire
+//! using simple encoding rules.
+//!
+//! # Encoding
+//!
+//! Lengths are represented as 64-bit unsigned integers in little-endian
+//! format. Byte strings, including file contents and syntactic strings
+//! part of the grammar, are prefixed by their 64-bit length, and padded
+//! to 8-byte (64-bit) alignment with zero bytes. The zero-length string
+//! is therefore encoded as eight zero bytes representing its length.
+//!
+//! # Grammar
+//!
+//! The NAR grammar is as follows:
+//! ```plain
+//! archive ::= "nix-archive-1" node
+//!
+//! node ::= "(" "type" "symlink" "target" string ")"
+//!      ||= "(" "type" "regular" ("executable" "")? "contents" string ")"
+//!      ||= "(" "type" "directory" entry* ")"
+//!
+//! entry ::= "entry" "(" "name" string "node" node ")"
+//! ```
+//!
+//! We rewrite it to pull together the purely syntactic elements into
+//! unified tokens, producing an equivalent grammar that can be parsed
+//! and serialized more elegantly:
+//! ```plain
+//! archive ::= TOK_NAR node
+//! node ::= TOK_SYM string             TOK_PAR
+//!      ||= (TOK_REG | TOK_EXE) string TOK_PAR
+//!      ||= TOK_DIR entry*             TOK_PAR
+//!
+//! entry ::= TOK_ENT string TOK_NOD node TOK_PAR
+//!
+//! TOK_NAR ::= "nix-archive-1" "(" "type"
+//! TOK_SYM ::= "symlink" "target"
+//! TOK_REG ::= "regular" "contents"
+//! TOK_EXE ::= "regular" "executable" ""
+//! TOK_DIR ::= "directory"
+//! TOK_ENT ::= "entry" "(" "name"
+//! TOK_NOD ::= "node" "(" "type"
+//! TOK_PAR ::= ")"
+//! ```
+//!
+//! # Restrictions
+//!
+//! NOTE: These restrictions are not (and cannot be) enforced by this module,
+//! but must be enforced by its consumers, [super::reader] and [super::writer].
+//!
+//! Directory entry names cannot have the reserved names `.` and `..`, nor contain
+//! forward slashes. They must appear in strictly ascending lexicographic order
+//! within a directory, and can be at most [MAX_NAME_LEN] bytes in length.
+//!
+//! Symlink targets can be at most [MAX_TARGET_LEN] bytes in length.
+//!
+//! Neither is permitted to be empty, or contain null bytes.
+
+// These values are the standard Linux length limits
+/// Maximum length of a directory entry name
 pub const MAX_NAME_LEN: usize = 255;
+/// Maximum length of a symlink target
 pub const MAX_TARGET_LEN: usize = 4095;
 
 #[cfg(test)]
@@ -19,9 +83,9 @@ fn token(xs: &[&str]) -> Vec<u8> {
 }
 
 pub const TOK_NAR: [u8; 56] = *b"\x0d\0\0\0\0\0\0\0nix-archive-1\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0";
+pub const TOK_SYM: [u8; 32] = *b"\x07\0\0\0\0\0\0\0symlink\0\x06\0\0\0\0\0\0\0target\0\0";
 pub const TOK_REG: [u8; 32] = *b"\x07\0\0\0\0\0\0\0regular\0\x08\0\0\0\0\0\0\0contents";
 pub const TOK_EXE: [u8; 64] = *b"\x07\0\0\0\0\0\0\0regular\0\x0a\0\0\0\0\0\0\0executable\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0contents";
-pub const TOK_SYM: [u8; 32] = *b"\x07\0\0\0\0\0\0\0symlink\0\x06\0\0\0\0\0\0\0target\0\0";
 pub const TOK_DIR: [u8; 24] = *b"\x09\0\0\0\0\0\0\0directory\0\0\0\0\0\0\0";
 pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0name\0\0\0\0";
 pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0";
@@ -31,9 +95,9 @@ pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0";
 fn tokens() {
     let cases: &[(&[u8], &[&str])] = &[
         (&TOK_NAR, &["nix-archive-1", "(", "type"]),
+        (&TOK_SYM, &["symlink", "target"]),
         (&TOK_REG, &["regular", "contents"]),
         (&TOK_EXE, &["regular", "executable", "", "contents"]),
-        (&TOK_SYM, &["symlink", "target"]),
         (&TOK_DIR, &["directory"]),
         (&TOK_ENT, &["entry", "(", "name"]),
         (&TOK_NOD, &["node", "(", "type"]),