about summary refs log tree commit diff
path: root/tvix/nix-compat/src/nar/wire/mod.rs
blob: 9e99b530ce1577afe5b6805ecb7fbb469f101edb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
//! NAR wire format, without I/O details, since those differ between
//! the synchronous and asynchronous implementations.
//!
//! The wire format is an S-expression format, encoded onto the wire
//! using simple encoding rules.
//!
//! # Encoding
//!
//! Lengths are represented as 64-bit unsigned integers in little-endian
//! format. Byte strings, including file contents and syntactic strings
//! part of the grammar, are prefixed by their 64-bit length, and padded
//! to 8-byte (64-bit) alignment with zero bytes. The zero-length string
//! is therefore encoded as eight zero bytes representing its length.
//!
//! # Grammar
//!
//! The NAR grammar is as follows:
//! ```plain
//! archive ::= "nix-archive-1" node
//!
//! node ::= "(" "type" "symlink" "target" string ")"
//!      ||= "(" "type" "regular" ("executable" "")? "contents" string ")"
//!      ||= "(" "type" "directory" entry* ")"
//!
//! entry ::= "entry" "(" "name" string "node" node ")"
//! ```
//!
//! We rewrite it to pull together the purely syntactic elements into
//! unified tokens, producing an equivalent grammar that can be parsed
//! and serialized more elegantly:
//! ```plain
//! archive ::= TOK_NAR node
//! node ::= TOK_SYM string             TOK_PAR
//!      ||= (TOK_REG | TOK_EXE) string TOK_PAR
//!      ||= TOK_DIR entry*             TOK_PAR
//!
//! entry ::= TOK_ENT string TOK_NOD node TOK_PAR
//!
//! TOK_NAR ::= "nix-archive-1" "(" "type"
//! TOK_SYM ::= "symlink" "target"
//! TOK_REG ::= "regular" "contents"
//! TOK_EXE ::= "regular" "executable" ""
//! TOK_DIR ::= "directory"
//! TOK_ENT ::= "entry" "(" "name"
//! TOK_NOD ::= "node" "(" "type"
//! TOK_PAR ::= ")"
//! ```
//!
//! # Restrictions
//!
//! NOTE: These restrictions are not (and cannot be) enforced by this module,
//! but must be enforced by its consumers, [super::reader] and [super::writer].
//!
//! Directory entry names cannot have the reserved names `.` and `..`, nor contain
//! forward slashes. They must appear in strictly ascending lexicographic order
//! within a directory, and can be at most [MAX_NAME_LEN] bytes in length.
//!
//! Symlink targets can be at most [MAX_TARGET_LEN] bytes in length.
//!
//! Neither is permitted to be empty, or contain null bytes.

// These values are the standard Linux length limits
/// Maximum length of a directory entry name
pub const MAX_NAME_LEN: usize = 255;
/// Maximum length of a symlink target
pub const MAX_TARGET_LEN: usize = 4095;

#[cfg(test)]
fn token(xs: &[&str]) -> Vec<u8> {
    let mut out = vec![];
    for x in xs {
        let len = x.len() as u64;
        out.extend_from_slice(&len.to_le_bytes());
        out.extend_from_slice(x.as_bytes());

        let n = x.len() & 7;
        if n != 0 {
            const ZERO: [u8; 8] = [0; 8];
            out.extend_from_slice(&ZERO[n..]);
        }
    }
    out
}

pub const TOK_NAR: [u8; 56] = *b"\x0d\0\0\0\0\0\0\0nix-archive-1\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0";
pub const TOK_SYM: [u8; 32] = *b"\x07\0\0\0\0\0\0\0symlink\0\x06\0\0\0\0\0\0\0target\0\0";
pub const TOK_REG: [u8; 32] = *b"\x07\0\0\0\0\0\0\0regular\0\x08\0\0\0\0\0\0\0contents";
pub const TOK_EXE: [u8; 64] = *b"\x07\0\0\0\0\0\0\0regular\0\x0a\0\0\0\0\0\0\0executable\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0contents";
pub const TOK_DIR: [u8; 24] = *b"\x09\0\0\0\0\0\0\0directory\0\0\0\0\0\0\0";
pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0name\0\0\0\0";
pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0";
pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0";
#[cfg(feature = "async")]
const TOK_PAD_PAR: [u8; 24] = *b"\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0";

#[cfg(feature = "async")]
#[derive(Debug)]
pub(crate) enum PadPar {}

#[cfg(feature = "async")]
impl crate::wire::reader::Tag for PadPar {
    const PATTERN: &'static [u8] = &TOK_PAD_PAR;

    type Buf = [u8; 24];

    fn make_buf() -> Self::Buf {
        [0; 24]
    }
}

#[test]
fn tokens() {
    let cases: &[(&[u8], &[&str])] = &[
        (&TOK_NAR, &["nix-archive-1", "(", "type"]),
        (&TOK_SYM, &["symlink", "target"]),
        (&TOK_REG, &["regular", "contents"]),
        (&TOK_EXE, &["regular", "executable", "", "contents"]),
        (&TOK_DIR, &["directory"]),
        (&TOK_ENT, &["entry", "(", "name"]),
        (&TOK_NOD, &["node", "(", "type"]),
        (&TOK_PAR, &[")"]),
    ];

    for &(tok, xs) in cases {
        assert_eq!(tok, token(xs));
    }
}

pub use tag::Tag;
mod tag;

tag::make! {
    /// These are the node tokens, succeeding [TOK_NAR] or [TOK_NOD],
    /// and preceding the next variable-length element.
    pub enum Node[16] {
        Sym = TOK_SYM,
        Reg = TOK_REG,
        Exe = TOK_EXE,
        Dir = TOK_DIR,
    }

    /// Directory entry or terminator
    pub enum Entry[0] {
        /// End of directory
        None = TOK_PAR,
        /// Directory entry
        /// Followed by a name string, [TOK_NOD], and a [Node].
        Some = TOK_ENT,
    }
}