diff options
author | edef <edef@edef.eu> | 2023-10-27T10·54+0000 |
---|---|---|
committer | edef <edef@edef.eu> | 2023-10-27T16·08+0000 |
commit | 9253bf6632b17e68417b0cde29609cafa6225cee (patch) | |
tree | aad4aca03b81f7001157930f24430d7c7637f107 /tvix/nix-compat | |
parent | b1ad94cc9a638846de5e2c5b08dbc999b49d11b9 (diff) |
feat(tvix/nix-compat): add narinfo parsing and serialisation r/6897
Change-Id: I72c63414794642ca8d85c3f635f49db888420c40 Reviewed-on: https://cl.tvl.fyi/c/depot/+/9852 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/nix-compat')
-rw-r--r-- | tvix/nix-compat/Cargo.toml | 6 | ||||
-rw-r--r-- | tvix/nix-compat/benches/narinfo_parse.rs | 65 | ||||
-rw-r--r-- | tvix/nix-compat/src/lib.rs | 1 | ||||
-rw-r--r-- | tvix/nix-compat/src/narinfo.rs | 406 | ||||
-rw-r--r-- | tvix/nix-compat/src/nixbase32.rs | 1 | ||||
-rw-r--r-- | tvix/nix-compat/testdata/narinfo.zst | bin | 0 -> 975945 bytes |
6 files changed, 478 insertions, 1 deletions
diff --git a/tvix/nix-compat/Cargo.toml b/tvix/nix-compat/Cargo.toml index cc6dfe5ebcdf..7a51d14e2781 100644 --- a/tvix/nix-compat/Cargo.toml +++ b/tvix/nix-compat/Cargo.toml @@ -26,6 +26,8 @@ serde_json = "1.0" test-case = "2.2.2" criterion = { version = "0.4", features = ["html_reports"] } hex-literal = "0.4.1" +pretty_assertions = "1.4.0" +zstd = "^0.9.0" [dev-dependencies.test-generator] # This fork of test-generator adds support for cargo workspaces, see @@ -36,3 +38,7 @@ rev = "82e799979980962aec1aa324ec6e0e4cad781f41" [[bench]] name = "derivation_parse_aterm" harness = false + +[[bench]] +name = "narinfo_parse" +harness = false diff --git a/tvix/nix-compat/benches/narinfo_parse.rs b/tvix/nix-compat/benches/narinfo_parse.rs new file mode 100644 index 000000000000..974d77b6d78b --- /dev/null +++ b/tvix/nix-compat/benches/narinfo_parse.rs @@ -0,0 +1,65 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput}; +use lazy_static::lazy_static; +use nix_compat::narinfo::NarInfo; +use std::{io, str}; + +const SAMPLE: &str = r#"StorePath: /nix/store/1pajsq519irjy86vli20bgq1wr1q3pny-banking-0.3.0 +URL: nar/0rdn027rxqbl42bv9jxhsipgq2hwqdapvwmdzligmzdmz2p9vybs.nar.xz +Compression: xz +FileHash: sha256:0rdn027rxqbl42bv9jxhsipgq2hwqdapvwmdzligmzdmz2p9vybs +FileSize: 92828 +NarHash: sha256:0cfnydzp132y69bh20dj76yfd6hc3qdyblbwr9hwn59vfmnb09m0 +NarSize: 173352 +References: 03d4ncyfh76mgs6sfayl8l6zzdhm219w-python3.9-mt-940-4.23.0 0rhbw783qcjxv3cqln1760i1lmz2yb67-gsettings-desktop-schemas-41.0 1dm9ndgg56ylawpcbdzkhl03fg6777rr-python3.9-six-1.16.0 1pajsq519irjy86vli20bgq1wr1q3pny-banking-0.3.0 2ccy5zc89zpc2aznqxgvzp4wm1bwj05n-bzip2-1.0.6.0.2-bin 32gy3pqk4n725lscdm622yzsg9np3xvs-python3.9-cryptography-36.0.0-dev 35chvqbr7vp9icdki0132fc6np09vrx5-python3.9-bleach-4.1.0 53abh5cz9zi4yh75lfzg99xqy0fdgj4i-python3.9-xmlschema-1.9.2 5p96sifyavb407mnharhyzlw6pn6km1b-glib-2.70.2-bin 6hil8z0zkqcgvaw1qwjyqa8qyaa1lm3k-python3.9-pycairo-1.20.1 803ffb21rv4af521pplb72zjm1ygm9kk-python3.9-pyparsing-2.4.7 al95l8psvmq5di3vdwa75n8w2m0sj2sy-gdk-pixbuf-2.42.6 b09371lq1jjrv43h8jpp82v23igndsn2-python3.9-fints-3.0.1 b53hk557pdk5mq4lv1zrh71a54qazbsm-python3.9-certifi-2021.10.08 bl0cwvwgch92cfsnli4dsah2gxgdickp-gtk+3-3.24.30 cfkq9wi7ypqk26c75dzic5v3nxlzyi58-python3.9-cryptography-36.0.0 cyhg57whqvrx7xf7fvn70dr5836y7zak-python3.9-sepaxml-2.4.1 d810g729g1c4lvp3nv1n3ah6cvpwg7by-cairo-1.16.0-dev dn4fwp0yx6nsa85cr20cwvdmg64xwmcy-python3-3.9.9 dzsj2n0nmq8nv6w0hvy5vb61kim3rzmd-pango-1.50.0 fs6rcnhbjvpxsyw5qiq0q7jx378fjrq7-python3.9-webencodings-0.5.1 g08sxarx191yh2dh0yk2j8icja54aksf-harfbuzz-3.1.2 glanz2lv7m6ak8pql0jcpr3izyp5cxm5-python3.9-pycparser-2.21 gpzx6h0dp5yhcvkfj68zs444ghll7dzm-python3.9-html5lib-1.1 gxyhqkpahahn4h8wbanzfhr1zkxbysid-expat-2.4.2-dev gy3pnc7bpff1h4ylhrivs4cjlvmxl0dk-python3.9-packaging-20.9 hhpqldw0552mf4mjdm2q7zqwy9hpfchd-libpng-apng-1.6.37-dev ig2bdwmplvs6dyg07fdyh006ha768jh1-python3.9-cffi-1.15.0 ij5rm5y6lmqzrwqd1zxckhbii3dg2nq5-glib-2.70.2-dev j5raylzz6fsafbgayyfaydadjl0x22s0-freetype-2.11.1-dev j6w2fbsl49jska4scyr860gz4df9biha-gobject-introspection-1.70.0 jfc99f1hrca6ih6h0n4ax431hjlx96j0-python3.9-brotli-1.0.9 kbazcxnki2qz514rl1plhsj3587hl8bb-python3.9-pysocks-1.7.1 kkljrrrj80fnz59qyfgnv6wvv0cbmpql-libhandy-1.5.0 l82il2lbp757c0smi81qmj4crlcmdz9s-python3.9-pygobject-3.42.0-dev m4zflhr10wz4frhgxqfi43rwvapki1pi-fontconfig-2.13.94-bin mbsc1c7mq15vgfzcdma9fglczih9ncfy-python3.9-chardet-4.0.0 mfvaaf4illpwrflg30cij5x4rncp9jin-python3.9-text-unidecode-1.3 msiv2nkdcaf4gvaf2cfnxcjm66j8mjxz-python3.9-elementpath-2.4.0 nmwapds8fcx22vd30d81va7a7a51ywwx-gettext-0.21 pbfraw351mksnkp2ni9c4rkc9cpp89iv-bash-5.1-p12 r8cbf18vrd54rb4psf3m4zlk5sd2jsv3-python3.9-pygobject-3.42.0 rig6npd9sd45ashf6fxcwgxzm7m4p0l3-python3.9-requests-2.26.0 ryj72ashr27gf4kh0ssgi3zpiv8fxw53-librsvg-2.52.4 s2jjq7rk5yrzlv9lyralzvpixg4p6jh3-atk-2.36.0 w1lsr2i37fr0mp1jya04nwa5nf5dxm2n-python3.9-setuptools-57.2.0 whfykra99ahs814l5hp3q5ps8rwzsf3s-python3.9-brotlicffi-1.0.9.2 wqdmghdvc4s95jgpp13fj5v3xar8mlks-python3.9-charset-normalizer-2.0.8 x1ha2nyji1px0iqknbyhdnvw4icw5h3i-python3.9-idna-3.3 z9560qb4ygbi0352m9pglwhi332cxb1f-python3.9-urllib3-1.26.7 +Deriver: 2ch8jx910qk6721mp4yqsmvdfgj5c8ir-banking-0.3.0.drv +Sig: cache.nixos.org-1:xcL67rBZPcdVZudDLpLeddkBa0KaFTw5A0udnaa0axysjrQ6Nvd9p3BLZ4rhKgl52/cKiU3c6aq60L8+IcE5Dw== +"#; + +lazy_static! { + static ref CASES: &'static [&'static str] = { + let data = + zstd::decode_all(io::Cursor::new(include_bytes!("../testdata/narinfo.zst"))).unwrap(); + let data = str::from_utf8(Vec::leak(data)).unwrap(); + Vec::leak( + data.split_inclusive("\n\n") + .map(|s| s.strip_suffix('\n').unwrap()) + .collect::<Vec<_>>(), + ) + }; +} + +pub fn parse(c: &mut Criterion) { + let mut g = c.benchmark_group("parse"); + + { + g.throughput(Throughput::Bytes(SAMPLE.len() as u64)); + g.bench_with_input("single", SAMPLE, |b, data| { + b.iter(|| { + black_box(NarInfo::parse(black_box(data))); + }); + }); + } + + { + for &case in *CASES { + NarInfo::parse(case).expect("should parse"); + } + + g.throughput(Throughput::Bytes( + CASES.iter().map(|s| s.len() as u64).sum(), + )); + g.bench_with_input("many", &*CASES, |b, data| { + let mut vec = vec![]; + b.iter(|| { + vec.clear(); + vec.extend(black_box(data).iter().map(|s| NarInfo::parse(s))); + black_box(&vec); + }); + }); + } + + g.finish(); +} + +criterion_group!(benches, parse); +criterion_main!(benches); diff --git a/tvix/nix-compat/src/lib.rs b/tvix/nix-compat/src/lib.rs index 6ec60f3cc8d3..dd161cc1f944 100644 --- a/tvix/nix-compat/src/lib.rs +++ b/tvix/nix-compat/src/lib.rs @@ -1,6 +1,7 @@ pub(crate) mod aterm; pub mod derivation; pub mod nar; +pub mod narinfo; pub mod nixbase32; pub mod nixhash; pub mod store_path; diff --git a/tvix/nix-compat/src/narinfo.rs b/tvix/nix-compat/src/narinfo.rs new file mode 100644 index 000000000000..a66709abfe79 --- /dev/null +++ b/tvix/nix-compat/src/narinfo.rs @@ -0,0 +1,406 @@ +//! NAR info files describe a store path in a traditional Nix binary cache. +//! Over the wire, they are formatted as "Key: value" pairs separated by newlines. +//! +//! It contains four kinds of information: +//! 1. the description of the store path itself +//! * store path prefix, digest, and name +//! * NAR hash and size +//! * references +//! 2. authenticity information +//! * zero or more signatures over that description +//! * an optional [CAHash] for content-addressed paths (fixed outputs, sources, and derivations) +//! 3. derivation metadata +//! * deriver (the derivation that produced this path) +//! * system (the system value of that derivation) +//! 4. cache-specific information +//! * URL of the compressed NAR, relative to the NAR info file +//! * compression algorithm used for the NAR +//! * hash and size of the compressed NAR + +use data_encoding::BASE64; +use std::{ + fmt::{self, Display}, + mem, +}; + +use crate::{ + nixbase32, + nixhash::{CAHash, NixHash}, + store_path::StorePathRef, +}; + +#[derive(Debug)] +pub struct NarInfo<'a> { + // core (authenticated, but unverified here) + /// Store path described by this [NarInfo] + pub store_path: StorePathRef<'a>, + /// SHA-256 digest of the NAR file + pub nar_hash: [u8; 32], + /// Size of the NAR file in bytes + pub nar_size: u64, + /// Store paths known to be referenced by the contents + pub references: Vec<StorePathRef<'a>>, + // authenticity + /// Ed25519 signature over the path fingerprint + pub signatures: Vec<Signature<'a>>, + /// Content address (for content-defined paths) + pub ca: Option<CAHash>, + // derivation metadata + /// Nix system triple of [deriver] + pub system: Option<&'a str>, + /// Store path of the derivation that produced this + pub deriver: Option<StorePathRef<'a>>, + // cache-specific untrusted metadata + /// Relative URL of the compressed NAR file + pub url: &'a str, + /// Compression method of the NAR file + /// TODO(edef): default this to bzip2, and have None mean "none" (uncompressed) + pub compression: Option<&'a str>, + /// SHA-256 digest of the file at `url` + pub file_hash: Option<[u8; 32]>, + /// Size of the file at `url` in bytes + pub file_size: Option<u64>, +} + +impl<'a> NarInfo<'a> { + pub fn parse(input: &'a str) -> Option<Self> { + let mut store_path = None; + let mut url = None; + let mut compression = None; + let mut file_hash = None; + let mut file_size = None; + let mut nar_hash = None; + let mut nar_size = None; + let mut references = None; + let mut system = None; + let mut deriver = None; + let mut signatures = vec![]; + let mut ca = None; + + for line in input.lines() { + let (tag, val) = line.split_once(':')?; + let val = val.strip_prefix(' ')?; + + match tag { + "StorePath" => { + let val = val.strip_prefix("/nix/store/")?; + let val = StorePathRef::from_bytes(val.as_bytes()).ok()?; + + if store_path.replace(val).is_some() { + return None; + } + } + "URL" => { + if val.is_empty() { + return None; + } + + if url.replace(val).is_some() { + return None; + } + } + "Compression" => { + if val.is_empty() { + return None; + } + + if compression.replace(val).is_some() { + return None; + } + } + "FileHash" => { + let val = val.strip_prefix("sha256:")?; + let val = nixbase32::decode_fixed::<32>(val).ok()?; + + if file_hash.replace(val).is_some() { + return None; + } + } + "FileSize" => { + let val = val.parse::<u64>().ok()?; + + if file_size.replace(val).is_some() { + return None; + } + } + "NarHash" => { + let val = val.strip_prefix("sha256:")?; + let val = nixbase32::decode_fixed::<32>(val).ok()?; + + if nar_hash.replace(val).is_some() { + return None; + } + } + "NarSize" => { + let val = val.parse::<u64>().ok()?; + + if nar_size.replace(val).is_some() { + return None; + } + } + "References" => { + let val: Vec<StorePathRef> = if !val.is_empty() { + let mut prev = ""; + val.split(' ') + .map(|s| { + if mem::replace(&mut prev, s) < s { + StorePathRef::from_bytes(s.as_bytes()).ok() + } else { + // references are out of order + None + } + }) + .collect::<Option<_>>()? + } else { + vec![] + }; + + if references.replace(val).is_some() { + return None; + } + } + "System" => { + if val.is_empty() { + return None; + } + + if system.replace(val).is_some() { + return None; + } + } + "Deriver" => { + let val = StorePathRef::from_bytes(val.as_bytes()).ok()?; + + if !val.name().ends_with(".drv") { + return None; + } + + if deriver.replace(val).is_some() { + return None; + } + } + "Sig" => { + let val = Signature::parse(val)?; + + signatures.push(val); + } + "CA" => { + let val = parse_ca(val)?; + + if ca.replace(val).is_some() { + return None; + } + } + _ => { + // unknown field, ignore + } + } + } + + Some(NarInfo { + store_path: store_path?, + nar_hash: nar_hash?, + nar_size: nar_size?, + references: references?, + signatures, + ca, + system, + deriver, + url: url?, + compression, + file_hash, + file_size, + }) + } +} + +impl Display for NarInfo<'_> { + fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { + writeln!(w, "StorePath: /nix/store/{}", self.store_path)?; + writeln!(w, "URL: {}", self.url)?; + + if let Some(compression) = self.compression { + writeln!(w, "Compression: {compression}")?; + } + + if let Some(file_hash) = self.file_hash { + writeln!(w, "FileHash: {}", fmt_hash(&NixHash::Sha256(file_hash)))?; + } + + if let Some(file_size) = self.file_size { + writeln!(w, "FileSize: {file_size}")?; + } + + writeln!(w, "NarHash: {}", fmt_hash(&NixHash::Sha256(self.nar_hash)))?; + writeln!(w, "NarSize: {}", self.nar_size)?; + + write!(w, "References:")?; + if self.references.is_empty() { + write!(w, " ")?; + } else { + for path in &self.references { + write!(w, " {path}")?; + } + } + writeln!(w)?; + + if let Some(deriver) = &self.deriver { + writeln!(w, "Deriver: {deriver}")?; + } + + if let Some(system) = self.system { + writeln!(w, "System: {system}")?; + } + + for sig in &self.signatures { + writeln!(w, "Sig: {sig}")?; + } + + if let Some(ca) = &self.ca { + writeln!(w, "CA: {}", fmt_ca(ca))?; + } + + Ok(()) + } +} + +#[derive(Debug)] +pub struct Signature<'a> { + name: &'a str, + bytes: [u8; 64], +} + +impl<'a> Signature<'a> { + pub fn parse(input: &'a str) -> Option<Signature<'a>> { + let (name, bytes64) = input.split_once(':')?; + + let mut buf = [0; 66]; + let mut bytes = [0; 64]; + match BASE64.decode_mut(bytes64.as_bytes(), &mut buf) { + Ok(64) => { + bytes.copy_from_slice(&buf[..64]); + } + _ => { + return None; + } + } + + Some(Signature { name, bytes }) + } + + pub fn name(&self) -> &'a str { + self.name + } + + pub fn bytes(&self) -> &[u8; 64] { + &self.bytes + } +} + +impl Display for Signature<'_> { + fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { + write!(w, "{}:{}", self.name, BASE64.encode(&self.bytes)) + } +} + +pub fn parse_ca(s: &str) -> Option<CAHash> { + let (tag, s) = s.split_once(':')?; + + match tag { + "text" => { + let digest = s.strip_prefix("sha256:")?; + let digest = nixbase32::decode_fixed(digest).ok()?; + Some(CAHash::Text(digest)) + } + "fixed" => { + if let Some(digest) = s.strip_prefix("r:sha256:") { + let digest = nixbase32::decode_fixed(digest).ok()?; + Some(CAHash::Nar(NixHash::Sha256(digest))) + } else { + parse_hash(s).map(CAHash::Flat) + } + } + _ => None, + } +} + +#[allow(non_camel_case_types)] +struct fmt_ca<'a>(&'a CAHash); + +impl Display for fmt_ca<'_> { + fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { + match self.0 { + CAHash::Flat(h) => { + write!(w, "fixed:{}", fmt_hash(h)) + } + &CAHash::Text(d) => { + write!(w, "text:{}", fmt_hash(&NixHash::Sha256(d))) + } + CAHash::Nar(h) => { + write!(w, "fixed:r:{}", fmt_hash(h)) + } + } + } +} + +fn parse_hash(s: &str) -> Option<NixHash> { + let (tag, digest) = s.split_once(':')?; + + (match tag { + "md5" => nixbase32::decode_fixed(digest).map(NixHash::Md5), + "sha1" => nixbase32::decode_fixed(digest).map(NixHash::Sha1), + "sha256" => nixbase32::decode_fixed(digest).map(NixHash::Sha256), + "sha512" => nixbase32::decode_fixed(digest) + .map(Box::new) + .map(NixHash::Sha512), + _ => return None, + }) + .ok() +} + +#[allow(non_camel_case_types)] +struct fmt_hash<'a>(&'a NixHash); + +impl Display for fmt_hash<'_> { + fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { + let (tag, digest) = match self.0 { + NixHash::Md5(d) => ("md5", &d[..]), + NixHash::Sha1(d) => ("sha1", &d[..]), + NixHash::Sha256(d) => ("sha256", &d[..]), + NixHash::Sha512(d) => ("sha512", &d[..]), + }; + + write!(w, "{tag}:{}", nixbase32::encode(digest)) + } +} + +#[cfg(test)] +mod test { + use lazy_static::lazy_static; + use pretty_assertions::assert_eq; + use std::{io, str}; + + use super::NarInfo; + + lazy_static! { + static ref CASES: &'static [&'static str] = { + let data = zstd::decode_all(io::Cursor::new(include_bytes!("../testdata/narinfo.zst"))) + .unwrap(); + let data = str::from_utf8(Vec::leak(data)).unwrap(); + Vec::leak( + data.split_inclusive("\n\n") + .map(|s| s.strip_suffix('\n').unwrap()) + .collect::<Vec<_>>(), + ) + }; + } + + #[test] + fn roundtrip() { + for &input in *CASES { + let parsed = NarInfo::parse(input).expect("should parse"); + let output = format!("{parsed}"); + assert_eq!(input, output, "should roundtrip"); + } + } +} diff --git a/tvix/nix-compat/src/nixbase32.rs b/tvix/nix-compat/src/nixbase32.rs index 6f88d51b344a..febc6fe598ac 100644 --- a/tvix/nix-compat/src/nixbase32.rs +++ b/tvix/nix-compat/src/nixbase32.rs @@ -124,7 +124,6 @@ fn decode_inner(input: &[u8], output: &mut [u8]) -> Result<(), Nixbase32DecodeEr Ok(()) } -#[cold] fn find_invalid(input: &[u8]) -> u8 { for &c in input { if !ALPHABET.contains(&c) { diff --git a/tvix/nix-compat/testdata/narinfo.zst b/tvix/nix-compat/testdata/narinfo.zst new file mode 100644 index 000000000000..361a422da86e --- /dev/null +++ b/tvix/nix-compat/testdata/narinfo.zst Binary files differ |