about summary refs log tree commit diff
path: root/tvix/nix-compat
diff options
context:
space:
mode:
authoredef <edef@edef.eu>2023-10-27T10·54+0000
committeredef <edef@edef.eu>2023-10-27T16·08+0000
commit9253bf6632b17e68417b0cde29609cafa6225cee (patch)
treeaad4aca03b81f7001157930f24430d7c7637f107 /tvix/nix-compat
parentb1ad94cc9a638846de5e2c5b08dbc999b49d11b9 (diff)
feat(tvix/nix-compat): add narinfo parsing and serialisation r/6897
Change-Id: I72c63414794642ca8d85c3f635f49db888420c40
Reviewed-on: https://cl.tvl.fyi/c/depot/+/9852
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/nix-compat')
-rw-r--r--tvix/nix-compat/Cargo.toml6
-rw-r--r--tvix/nix-compat/benches/narinfo_parse.rs65
-rw-r--r--tvix/nix-compat/src/lib.rs1
-rw-r--r--tvix/nix-compat/src/narinfo.rs406
-rw-r--r--tvix/nix-compat/src/nixbase32.rs1
-rw-r--r--tvix/nix-compat/testdata/narinfo.zstbin0 -> 975945 bytes
6 files changed, 478 insertions, 1 deletions
diff --git a/tvix/nix-compat/Cargo.toml b/tvix/nix-compat/Cargo.toml
index cc6dfe5ebcdf..7a51d14e2781 100644
--- a/tvix/nix-compat/Cargo.toml
+++ b/tvix/nix-compat/Cargo.toml
@@ -26,6 +26,8 @@ serde_json = "1.0"
 test-case = "2.2.2"
 criterion = { version = "0.4", features = ["html_reports"] }
 hex-literal = "0.4.1"
+pretty_assertions = "1.4.0"
+zstd = "^0.9.0"
 
 [dev-dependencies.test-generator]
 # This fork of test-generator adds support for cargo workspaces, see
@@ -36,3 +38,7 @@ rev = "82e799979980962aec1aa324ec6e0e4cad781f41"
 [[bench]]
 name = "derivation_parse_aterm"
 harness = false
+
+[[bench]]
+name = "narinfo_parse"
+harness = false
diff --git a/tvix/nix-compat/benches/narinfo_parse.rs b/tvix/nix-compat/benches/narinfo_parse.rs
new file mode 100644
index 000000000000..974d77b6d78b
--- /dev/null
+++ b/tvix/nix-compat/benches/narinfo_parse.rs
@@ -0,0 +1,65 @@
+use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
+use lazy_static::lazy_static;
+use nix_compat::narinfo::NarInfo;
+use std::{io, str};
+
+const SAMPLE: &str = r#"StorePath: /nix/store/1pajsq519irjy86vli20bgq1wr1q3pny-banking-0.3.0
+URL: nar/0rdn027rxqbl42bv9jxhsipgq2hwqdapvwmdzligmzdmz2p9vybs.nar.xz
+Compression: xz
+FileHash: sha256:0rdn027rxqbl42bv9jxhsipgq2hwqdapvwmdzligmzdmz2p9vybs
+FileSize: 92828
+NarHash: sha256:0cfnydzp132y69bh20dj76yfd6hc3qdyblbwr9hwn59vfmnb09m0
+NarSize: 173352
+References: 03d4ncyfh76mgs6sfayl8l6zzdhm219w-python3.9-mt-940-4.23.0 0rhbw783qcjxv3cqln1760i1lmz2yb67-gsettings-desktop-schemas-41.0 1dm9ndgg56ylawpcbdzkhl03fg6777rr-python3.9-six-1.16.0 1pajsq519irjy86vli20bgq1wr1q3pny-banking-0.3.0 2ccy5zc89zpc2aznqxgvzp4wm1bwj05n-bzip2-1.0.6.0.2-bin 32gy3pqk4n725lscdm622yzsg9np3xvs-python3.9-cryptography-36.0.0-dev 35chvqbr7vp9icdki0132fc6np09vrx5-python3.9-bleach-4.1.0 53abh5cz9zi4yh75lfzg99xqy0fdgj4i-python3.9-xmlschema-1.9.2 5p96sifyavb407mnharhyzlw6pn6km1b-glib-2.70.2-bin 6hil8z0zkqcgvaw1qwjyqa8qyaa1lm3k-python3.9-pycairo-1.20.1 803ffb21rv4af521pplb72zjm1ygm9kk-python3.9-pyparsing-2.4.7 al95l8psvmq5di3vdwa75n8w2m0sj2sy-gdk-pixbuf-2.42.6 b09371lq1jjrv43h8jpp82v23igndsn2-python3.9-fints-3.0.1 b53hk557pdk5mq4lv1zrh71a54qazbsm-python3.9-certifi-2021.10.08 bl0cwvwgch92cfsnli4dsah2gxgdickp-gtk+3-3.24.30 cfkq9wi7ypqk26c75dzic5v3nxlzyi58-python3.9-cryptography-36.0.0 cyhg57whqvrx7xf7fvn70dr5836y7zak-python3.9-sepaxml-2.4.1 d810g729g1c4lvp3nv1n3ah6cvpwg7by-cairo-1.16.0-dev dn4fwp0yx6nsa85cr20cwvdmg64xwmcy-python3-3.9.9 dzsj2n0nmq8nv6w0hvy5vb61kim3rzmd-pango-1.50.0 fs6rcnhbjvpxsyw5qiq0q7jx378fjrq7-python3.9-webencodings-0.5.1 g08sxarx191yh2dh0yk2j8icja54aksf-harfbuzz-3.1.2 glanz2lv7m6ak8pql0jcpr3izyp5cxm5-python3.9-pycparser-2.21 gpzx6h0dp5yhcvkfj68zs444ghll7dzm-python3.9-html5lib-1.1 gxyhqkpahahn4h8wbanzfhr1zkxbysid-expat-2.4.2-dev gy3pnc7bpff1h4ylhrivs4cjlvmxl0dk-python3.9-packaging-20.9 hhpqldw0552mf4mjdm2q7zqwy9hpfchd-libpng-apng-1.6.37-dev ig2bdwmplvs6dyg07fdyh006ha768jh1-python3.9-cffi-1.15.0 ij5rm5y6lmqzrwqd1zxckhbii3dg2nq5-glib-2.70.2-dev j5raylzz6fsafbgayyfaydadjl0x22s0-freetype-2.11.1-dev j6w2fbsl49jska4scyr860gz4df9biha-gobject-introspection-1.70.0 jfc99f1hrca6ih6h0n4ax431hjlx96j0-python3.9-brotli-1.0.9 kbazcxnki2qz514rl1plhsj3587hl8bb-python3.9-pysocks-1.7.1 kkljrrrj80fnz59qyfgnv6wvv0cbmpql-libhandy-1.5.0 l82il2lbp757c0smi81qmj4crlcmdz9s-python3.9-pygobject-3.42.0-dev m4zflhr10wz4frhgxqfi43rwvapki1pi-fontconfig-2.13.94-bin mbsc1c7mq15vgfzcdma9fglczih9ncfy-python3.9-chardet-4.0.0 mfvaaf4illpwrflg30cij5x4rncp9jin-python3.9-text-unidecode-1.3 msiv2nkdcaf4gvaf2cfnxcjm66j8mjxz-python3.9-elementpath-2.4.0 nmwapds8fcx22vd30d81va7a7a51ywwx-gettext-0.21 pbfraw351mksnkp2ni9c4rkc9cpp89iv-bash-5.1-p12 r8cbf18vrd54rb4psf3m4zlk5sd2jsv3-python3.9-pygobject-3.42.0 rig6npd9sd45ashf6fxcwgxzm7m4p0l3-python3.9-requests-2.26.0 ryj72ashr27gf4kh0ssgi3zpiv8fxw53-librsvg-2.52.4 s2jjq7rk5yrzlv9lyralzvpixg4p6jh3-atk-2.36.0 w1lsr2i37fr0mp1jya04nwa5nf5dxm2n-python3.9-setuptools-57.2.0 whfykra99ahs814l5hp3q5ps8rwzsf3s-python3.9-brotlicffi-1.0.9.2 wqdmghdvc4s95jgpp13fj5v3xar8mlks-python3.9-charset-normalizer-2.0.8 x1ha2nyji1px0iqknbyhdnvw4icw5h3i-python3.9-idna-3.3 z9560qb4ygbi0352m9pglwhi332cxb1f-python3.9-urllib3-1.26.7
+Deriver: 2ch8jx910qk6721mp4yqsmvdfgj5c8ir-banking-0.3.0.drv
+Sig: cache.nixos.org-1:xcL67rBZPcdVZudDLpLeddkBa0KaFTw5A0udnaa0axysjrQ6Nvd9p3BLZ4rhKgl52/cKiU3c6aq60L8+IcE5Dw==
+"#;
+
+lazy_static! {
+    static ref CASES: &'static [&'static str] = {
+        let data =
+            zstd::decode_all(io::Cursor::new(include_bytes!("../testdata/narinfo.zst"))).unwrap();
+        let data = str::from_utf8(Vec::leak(data)).unwrap();
+        Vec::leak(
+            data.split_inclusive("\n\n")
+                .map(|s| s.strip_suffix('\n').unwrap())
+                .collect::<Vec<_>>(),
+        )
+    };
+}
+
+pub fn parse(c: &mut Criterion) {
+    let mut g = c.benchmark_group("parse");
+
+    {
+        g.throughput(Throughput::Bytes(SAMPLE.len() as u64));
+        g.bench_with_input("single", SAMPLE, |b, data| {
+            b.iter(|| {
+                black_box(NarInfo::parse(black_box(data)));
+            });
+        });
+    }
+
+    {
+        for &case in *CASES {
+            NarInfo::parse(case).expect("should parse");
+        }
+
+        g.throughput(Throughput::Bytes(
+            CASES.iter().map(|s| s.len() as u64).sum(),
+        ));
+        g.bench_with_input("many", &*CASES, |b, data| {
+            let mut vec = vec![];
+            b.iter(|| {
+                vec.clear();
+                vec.extend(black_box(data).iter().map(|s| NarInfo::parse(s)));
+                black_box(&vec);
+            });
+        });
+    }
+
+    g.finish();
+}
+
+criterion_group!(benches, parse);
+criterion_main!(benches);
diff --git a/tvix/nix-compat/src/lib.rs b/tvix/nix-compat/src/lib.rs
index 6ec60f3cc8d3..dd161cc1f944 100644
--- a/tvix/nix-compat/src/lib.rs
+++ b/tvix/nix-compat/src/lib.rs
@@ -1,6 +1,7 @@
 pub(crate) mod aterm;
 pub mod derivation;
 pub mod nar;
+pub mod narinfo;
 pub mod nixbase32;
 pub mod nixhash;
 pub mod store_path;
diff --git a/tvix/nix-compat/src/narinfo.rs b/tvix/nix-compat/src/narinfo.rs
new file mode 100644
index 000000000000..a66709abfe79
--- /dev/null
+++ b/tvix/nix-compat/src/narinfo.rs
@@ -0,0 +1,406 @@
+//! NAR info files describe a store path in a traditional Nix binary cache.
+//! Over the wire, they are formatted as "Key: value" pairs separated by newlines.
+//!
+//! It contains four kinds of information:
+//! 1. the description of the store path itself
+//!    * store path prefix, digest, and name
+//!    * NAR hash and size
+//!    * references
+//! 2. authenticity information
+//!    * zero or more signatures over that description
+//!    * an optional [CAHash] for content-addressed paths (fixed outputs, sources, and derivations)
+//! 3. derivation metadata
+//!    * deriver (the derivation that produced this path)
+//!    * system (the system value of that derivation)
+//! 4. cache-specific information
+//!    * URL of the compressed NAR, relative to the NAR info file
+//!    * compression algorithm used for the NAR
+//!    * hash and size of the compressed NAR
+
+use data_encoding::BASE64;
+use std::{
+    fmt::{self, Display},
+    mem,
+};
+
+use crate::{
+    nixbase32,
+    nixhash::{CAHash, NixHash},
+    store_path::StorePathRef,
+};
+
+#[derive(Debug)]
+pub struct NarInfo<'a> {
+    // core (authenticated, but unverified here)
+    /// Store path described by this [NarInfo]
+    pub store_path: StorePathRef<'a>,
+    /// SHA-256 digest of the NAR file
+    pub nar_hash: [u8; 32],
+    /// Size of the NAR file in bytes
+    pub nar_size: u64,
+    /// Store paths known to be referenced by the contents
+    pub references: Vec<StorePathRef<'a>>,
+    // authenticity
+    /// Ed25519 signature over the path fingerprint
+    pub signatures: Vec<Signature<'a>>,
+    /// Content address (for content-defined paths)
+    pub ca: Option<CAHash>,
+    // derivation metadata
+    /// Nix system triple of [deriver]
+    pub system: Option<&'a str>,
+    /// Store path of the derivation that produced this
+    pub deriver: Option<StorePathRef<'a>>,
+    // cache-specific untrusted metadata
+    /// Relative URL of the compressed NAR file
+    pub url: &'a str,
+    /// Compression method of the NAR file
+    /// TODO(edef): default this to bzip2, and have None mean "none" (uncompressed)
+    pub compression: Option<&'a str>,
+    /// SHA-256 digest of the file at `url`
+    pub file_hash: Option<[u8; 32]>,
+    /// Size of the file at `url` in bytes
+    pub file_size: Option<u64>,
+}
+
+impl<'a> NarInfo<'a> {
+    pub fn parse(input: &'a str) -> Option<Self> {
+        let mut store_path = None;
+        let mut url = None;
+        let mut compression = None;
+        let mut file_hash = None;
+        let mut file_size = None;
+        let mut nar_hash = None;
+        let mut nar_size = None;
+        let mut references = None;
+        let mut system = None;
+        let mut deriver = None;
+        let mut signatures = vec![];
+        let mut ca = None;
+
+        for line in input.lines() {
+            let (tag, val) = line.split_once(':')?;
+            let val = val.strip_prefix(' ')?;
+
+            match tag {
+                "StorePath" => {
+                    let val = val.strip_prefix("/nix/store/")?;
+                    let val = StorePathRef::from_bytes(val.as_bytes()).ok()?;
+
+                    if store_path.replace(val).is_some() {
+                        return None;
+                    }
+                }
+                "URL" => {
+                    if val.is_empty() {
+                        return None;
+                    }
+
+                    if url.replace(val).is_some() {
+                        return None;
+                    }
+                }
+                "Compression" => {
+                    if val.is_empty() {
+                        return None;
+                    }
+
+                    if compression.replace(val).is_some() {
+                        return None;
+                    }
+                }
+                "FileHash" => {
+                    let val = val.strip_prefix("sha256:")?;
+                    let val = nixbase32::decode_fixed::<32>(val).ok()?;
+
+                    if file_hash.replace(val).is_some() {
+                        return None;
+                    }
+                }
+                "FileSize" => {
+                    let val = val.parse::<u64>().ok()?;
+
+                    if file_size.replace(val).is_some() {
+                        return None;
+                    }
+                }
+                "NarHash" => {
+                    let val = val.strip_prefix("sha256:")?;
+                    let val = nixbase32::decode_fixed::<32>(val).ok()?;
+
+                    if nar_hash.replace(val).is_some() {
+                        return None;
+                    }
+                }
+                "NarSize" => {
+                    let val = val.parse::<u64>().ok()?;
+
+                    if nar_size.replace(val).is_some() {
+                        return None;
+                    }
+                }
+                "References" => {
+                    let val: Vec<StorePathRef> = if !val.is_empty() {
+                        let mut prev = "";
+                        val.split(' ')
+                            .map(|s| {
+                                if mem::replace(&mut prev, s) < s {
+                                    StorePathRef::from_bytes(s.as_bytes()).ok()
+                                } else {
+                                    // references are out of order
+                                    None
+                                }
+                            })
+                            .collect::<Option<_>>()?
+                    } else {
+                        vec![]
+                    };
+
+                    if references.replace(val).is_some() {
+                        return None;
+                    }
+                }
+                "System" => {
+                    if val.is_empty() {
+                        return None;
+                    }
+
+                    if system.replace(val).is_some() {
+                        return None;
+                    }
+                }
+                "Deriver" => {
+                    let val = StorePathRef::from_bytes(val.as_bytes()).ok()?;
+
+                    if !val.name().ends_with(".drv") {
+                        return None;
+                    }
+
+                    if deriver.replace(val).is_some() {
+                        return None;
+                    }
+                }
+                "Sig" => {
+                    let val = Signature::parse(val)?;
+
+                    signatures.push(val);
+                }
+                "CA" => {
+                    let val = parse_ca(val)?;
+
+                    if ca.replace(val).is_some() {
+                        return None;
+                    }
+                }
+                _ => {
+                    // unknown field, ignore
+                }
+            }
+        }
+
+        Some(NarInfo {
+            store_path: store_path?,
+            nar_hash: nar_hash?,
+            nar_size: nar_size?,
+            references: references?,
+            signatures,
+            ca,
+            system,
+            deriver,
+            url: url?,
+            compression,
+            file_hash,
+            file_size,
+        })
+    }
+}
+
+impl Display for NarInfo<'_> {
+    fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result {
+        writeln!(w, "StorePath: /nix/store/{}", self.store_path)?;
+        writeln!(w, "URL: {}", self.url)?;
+
+        if let Some(compression) = self.compression {
+            writeln!(w, "Compression: {compression}")?;
+        }
+
+        if let Some(file_hash) = self.file_hash {
+            writeln!(w, "FileHash: {}", fmt_hash(&NixHash::Sha256(file_hash)))?;
+        }
+
+        if let Some(file_size) = self.file_size {
+            writeln!(w, "FileSize: {file_size}")?;
+        }
+
+        writeln!(w, "NarHash: {}", fmt_hash(&NixHash::Sha256(self.nar_hash)))?;
+        writeln!(w, "NarSize: {}", self.nar_size)?;
+
+        write!(w, "References:")?;
+        if self.references.is_empty() {
+            write!(w, " ")?;
+        } else {
+            for path in &self.references {
+                write!(w, " {path}")?;
+            }
+        }
+        writeln!(w)?;
+
+        if let Some(deriver) = &self.deriver {
+            writeln!(w, "Deriver: {deriver}")?;
+        }
+
+        if let Some(system) = self.system {
+            writeln!(w, "System: {system}")?;
+        }
+
+        for sig in &self.signatures {
+            writeln!(w, "Sig: {sig}")?;
+        }
+
+        if let Some(ca) = &self.ca {
+            writeln!(w, "CA: {}", fmt_ca(ca))?;
+        }
+
+        Ok(())
+    }
+}
+
+#[derive(Debug)]
+pub struct Signature<'a> {
+    name: &'a str,
+    bytes: [u8; 64],
+}
+
+impl<'a> Signature<'a> {
+    pub fn parse(input: &'a str) -> Option<Signature<'a>> {
+        let (name, bytes64) = input.split_once(':')?;
+
+        let mut buf = [0; 66];
+        let mut bytes = [0; 64];
+        match BASE64.decode_mut(bytes64.as_bytes(), &mut buf) {
+            Ok(64) => {
+                bytes.copy_from_slice(&buf[..64]);
+            }
+            _ => {
+                return None;
+            }
+        }
+
+        Some(Signature { name, bytes })
+    }
+
+    pub fn name(&self) -> &'a str {
+        self.name
+    }
+
+    pub fn bytes(&self) -> &[u8; 64] {
+        &self.bytes
+    }
+}
+
+impl Display for Signature<'_> {
+    fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result {
+        write!(w, "{}:{}", self.name, BASE64.encode(&self.bytes))
+    }
+}
+
+pub fn parse_ca(s: &str) -> Option<CAHash> {
+    let (tag, s) = s.split_once(':')?;
+
+    match tag {
+        "text" => {
+            let digest = s.strip_prefix("sha256:")?;
+            let digest = nixbase32::decode_fixed(digest).ok()?;
+            Some(CAHash::Text(digest))
+        }
+        "fixed" => {
+            if let Some(digest) = s.strip_prefix("r:sha256:") {
+                let digest = nixbase32::decode_fixed(digest).ok()?;
+                Some(CAHash::Nar(NixHash::Sha256(digest)))
+            } else {
+                parse_hash(s).map(CAHash::Flat)
+            }
+        }
+        _ => None,
+    }
+}
+
+#[allow(non_camel_case_types)]
+struct fmt_ca<'a>(&'a CAHash);
+
+impl Display for fmt_ca<'_> {
+    fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result {
+        match self.0 {
+            CAHash::Flat(h) => {
+                write!(w, "fixed:{}", fmt_hash(h))
+            }
+            &CAHash::Text(d) => {
+                write!(w, "text:{}", fmt_hash(&NixHash::Sha256(d)))
+            }
+            CAHash::Nar(h) => {
+                write!(w, "fixed:r:{}", fmt_hash(h))
+            }
+        }
+    }
+}
+
+fn parse_hash(s: &str) -> Option<NixHash> {
+    let (tag, digest) = s.split_once(':')?;
+
+    (match tag {
+        "md5" => nixbase32::decode_fixed(digest).map(NixHash::Md5),
+        "sha1" => nixbase32::decode_fixed(digest).map(NixHash::Sha1),
+        "sha256" => nixbase32::decode_fixed(digest).map(NixHash::Sha256),
+        "sha512" => nixbase32::decode_fixed(digest)
+            .map(Box::new)
+            .map(NixHash::Sha512),
+        _ => return None,
+    })
+    .ok()
+}
+
+#[allow(non_camel_case_types)]
+struct fmt_hash<'a>(&'a NixHash);
+
+impl Display for fmt_hash<'_> {
+    fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result {
+        let (tag, digest) = match self.0 {
+            NixHash::Md5(d) => ("md5", &d[..]),
+            NixHash::Sha1(d) => ("sha1", &d[..]),
+            NixHash::Sha256(d) => ("sha256", &d[..]),
+            NixHash::Sha512(d) => ("sha512", &d[..]),
+        };
+
+        write!(w, "{tag}:{}", nixbase32::encode(digest))
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use lazy_static::lazy_static;
+    use pretty_assertions::assert_eq;
+    use std::{io, str};
+
+    use super::NarInfo;
+
+    lazy_static! {
+        static ref CASES: &'static [&'static str] = {
+            let data = zstd::decode_all(io::Cursor::new(include_bytes!("../testdata/narinfo.zst")))
+                .unwrap();
+            let data = str::from_utf8(Vec::leak(data)).unwrap();
+            Vec::leak(
+                data.split_inclusive("\n\n")
+                    .map(|s| s.strip_suffix('\n').unwrap())
+                    .collect::<Vec<_>>(),
+            )
+        };
+    }
+
+    #[test]
+    fn roundtrip() {
+        for &input in *CASES {
+            let parsed = NarInfo::parse(input).expect("should parse");
+            let output = format!("{parsed}");
+            assert_eq!(input, output, "should roundtrip");
+        }
+    }
+}
diff --git a/tvix/nix-compat/src/nixbase32.rs b/tvix/nix-compat/src/nixbase32.rs
index 6f88d51b344a..febc6fe598ac 100644
--- a/tvix/nix-compat/src/nixbase32.rs
+++ b/tvix/nix-compat/src/nixbase32.rs
@@ -124,7 +124,6 @@ fn decode_inner(input: &[u8], output: &mut [u8]) -> Result<(), Nixbase32DecodeEr
     Ok(())
 }
 
-#[cold]
 fn find_invalid(input: &[u8]) -> u8 {
     for &c in input {
         if !ALPHABET.contains(&c) {
diff --git a/tvix/nix-compat/testdata/narinfo.zst b/tvix/nix-compat/testdata/narinfo.zst
new file mode 100644
index 000000000000..361a422da86e
--- /dev/null
+++ b/tvix/nix-compat/testdata/narinfo.zst
Binary files differ