diff options
Diffstat (limited to 'tvix/nix-compat')
74 files changed, 10482 insertions, 0 deletions
diff --git a/tvix/nix-compat/Cargo.toml b/tvix/nix-compat/Cargo.toml new file mode 100644 index 0000000000..91fd19475a --- /dev/null +++ b/tvix/nix-compat/Cargo.toml @@ -0,0 +1,56 @@ +[package] +name = "nix-compat" +version = "0.1.0" +edition = "2021" + +[features] +# async NAR writer. Also needs the `wire` feature. +async = ["tokio"] +# code emitting low-level packets used in the daemon protocol. +wire = ["tokio", "pin-project-lite"] + +# Enable all features by default. +default = ["async", "wire"] + +[dependencies] +bitflags = "2.4.1" +bstr = { version = "1.6.0", features = ["alloc", "unicode", "serde"] } +data-encoding = "2.3.3" +ed25519 = "2.2.3" +ed25519-dalek = "2.1.0" +enum-primitive-derive = "0.3.0" +glob = "0.3.0" +nom = "7.1.3" +num-traits = "0.2.18" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +sha2 = "0.10.6" +thiserror = "1.0.38" + +[dependencies.tokio] +optional = true +version = "1.32.0" +features = ["io-util", "macros"] + +[dependencies.pin-project-lite] +optional = true +version = "0.2.13" + +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } +futures = { version = "0.3.30", default-features = false, features = ["executor"] } +hex-literal = "0.4.1" +lazy_static = "1.4.0" +pretty_assertions = "1.4.0" +rstest = "0.19.0" +serde_json = "1.0" +tokio-test = "0.4.3" +zstd = "^0.13.0" + +[[bench]] +name = "derivation_parse_aterm" +harness = false + +[[bench]] +name = "narinfo_parse" +harness = false diff --git a/tvix/nix-compat/benches/derivation_parse_aterm.rs b/tvix/nix-compat/benches/derivation_parse_aterm.rs new file mode 100644 index 0000000000..4ace7d4480 --- /dev/null +++ b/tvix/nix-compat/benches/derivation_parse_aterm.rs @@ -0,0 +1,31 @@ +use std::path::Path; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use nix_compat::derivation::Derivation; + +const RESOURCES_PATHS: &str = "src/derivation/tests/derivation_tests/ok"; + +fn bench_aterm_parser(c: &mut Criterion) { + for drv in [ + "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", + "292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv", + "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv", + "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv", + "9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv", + "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv", + "h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv", + "m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv", + "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", + "x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv", + ] { + let drv_path = Path::new(RESOURCES_PATHS).join(drv); + let drv_bytes = &std::fs::read(drv_path).unwrap(); + + c.bench_function(drv, |b| { + b.iter(|| Derivation::from_aterm_bytes(black_box(drv_bytes))) + }); + } +} + +criterion_group!(benches, bench_aterm_parser); +criterion_main!(benches); diff --git a/tvix/nix-compat/benches/narinfo_parse.rs b/tvix/nix-compat/benches/narinfo_parse.rs new file mode 100644 index 0000000000..7ffd24d12b --- /dev/null +++ b/tvix/nix-compat/benches/narinfo_parse.rs @@ -0,0 +1,69 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput}; +use lazy_static::lazy_static; +use nix_compat::narinfo::NarInfo; +use std::{io, str}; + +const SAMPLE: &str = r#"StorePath: /nix/store/1pajsq519irjy86vli20bgq1wr1q3pny-banking-0.3.0 +URL: nar/0rdn027rxqbl42bv9jxhsipgq2hwqdapvwmdzligmzdmz2p9vybs.nar.xz +Compression: xz +FileHash: sha256:0rdn027rxqbl42bv9jxhsipgq2hwqdapvwmdzligmzdmz2p9vybs +FileSize: 92828 +NarHash: sha256:0cfnydzp132y69bh20dj76yfd6hc3qdyblbwr9hwn59vfmnb09m0 +NarSize: 173352 +References: 03d4ncyfh76mgs6sfayl8l6zzdhm219w-python3.9-mt-940-4.23.0 0rhbw783qcjxv3cqln1760i1lmz2yb67-gsettings-desktop-schemas-41.0 1dm9ndgg56ylawpcbdzkhl03fg6777rr-python3.9-six-1.16.0 1pajsq519irjy86vli20bgq1wr1q3pny-banking-0.3.0 2ccy5zc89zpc2aznqxgvzp4wm1bwj05n-bzip2-1.0.6.0.2-bin 32gy3pqk4n725lscdm622yzsg9np3xvs-python3.9-cryptography-36.0.0-dev 35chvqbr7vp9icdki0132fc6np09vrx5-python3.9-bleach-4.1.0 53abh5cz9zi4yh75lfzg99xqy0fdgj4i-python3.9-xmlschema-1.9.2 5p96sifyavb407mnharhyzlw6pn6km1b-glib-2.70.2-bin 6hil8z0zkqcgvaw1qwjyqa8qyaa1lm3k-python3.9-pycairo-1.20.1 803ffb21rv4af521pplb72zjm1ygm9kk-python3.9-pyparsing-2.4.7 al95l8psvmq5di3vdwa75n8w2m0sj2sy-gdk-pixbuf-2.42.6 b09371lq1jjrv43h8jpp82v23igndsn2-python3.9-fints-3.0.1 b53hk557pdk5mq4lv1zrh71a54qazbsm-python3.9-certifi-2021.10.08 bl0cwvwgch92cfsnli4dsah2gxgdickp-gtk+3-3.24.30 cfkq9wi7ypqk26c75dzic5v3nxlzyi58-python3.9-cryptography-36.0.0 cyhg57whqvrx7xf7fvn70dr5836y7zak-python3.9-sepaxml-2.4.1 d810g729g1c4lvp3nv1n3ah6cvpwg7by-cairo-1.16.0-dev dn4fwp0yx6nsa85cr20cwvdmg64xwmcy-python3-3.9.9 dzsj2n0nmq8nv6w0hvy5vb61kim3rzmd-pango-1.50.0 fs6rcnhbjvpxsyw5qiq0q7jx378fjrq7-python3.9-webencodings-0.5.1 g08sxarx191yh2dh0yk2j8icja54aksf-harfbuzz-3.1.2 glanz2lv7m6ak8pql0jcpr3izyp5cxm5-python3.9-pycparser-2.21 gpzx6h0dp5yhcvkfj68zs444ghll7dzm-python3.9-html5lib-1.1 gxyhqkpahahn4h8wbanzfhr1zkxbysid-expat-2.4.2-dev gy3pnc7bpff1h4ylhrivs4cjlvmxl0dk-python3.9-packaging-20.9 hhpqldw0552mf4mjdm2q7zqwy9hpfchd-libpng-apng-1.6.37-dev ig2bdwmplvs6dyg07fdyh006ha768jh1-python3.9-cffi-1.15.0 ij5rm5y6lmqzrwqd1zxckhbii3dg2nq5-glib-2.70.2-dev j5raylzz6fsafbgayyfaydadjl0x22s0-freetype-2.11.1-dev j6w2fbsl49jska4scyr860gz4df9biha-gobject-introspection-1.70.0 jfc99f1hrca6ih6h0n4ax431hjlx96j0-python3.9-brotli-1.0.9 kbazcxnki2qz514rl1plhsj3587hl8bb-python3.9-pysocks-1.7.1 kkljrrrj80fnz59qyfgnv6wvv0cbmpql-libhandy-1.5.0 l82il2lbp757c0smi81qmj4crlcmdz9s-python3.9-pygobject-3.42.0-dev m4zflhr10wz4frhgxqfi43rwvapki1pi-fontconfig-2.13.94-bin mbsc1c7mq15vgfzcdma9fglczih9ncfy-python3.9-chardet-4.0.0 mfvaaf4illpwrflg30cij5x4rncp9jin-python3.9-text-unidecode-1.3 msiv2nkdcaf4gvaf2cfnxcjm66j8mjxz-python3.9-elementpath-2.4.0 nmwapds8fcx22vd30d81va7a7a51ywwx-gettext-0.21 pbfraw351mksnkp2ni9c4rkc9cpp89iv-bash-5.1-p12 r8cbf18vrd54rb4psf3m4zlk5sd2jsv3-python3.9-pygobject-3.42.0 rig6npd9sd45ashf6fxcwgxzm7m4p0l3-python3.9-requests-2.26.0 ryj72ashr27gf4kh0ssgi3zpiv8fxw53-librsvg-2.52.4 s2jjq7rk5yrzlv9lyralzvpixg4p6jh3-atk-2.36.0 w1lsr2i37fr0mp1jya04nwa5nf5dxm2n-python3.9-setuptools-57.2.0 whfykra99ahs814l5hp3q5ps8rwzsf3s-python3.9-brotlicffi-1.0.9.2 wqdmghdvc4s95jgpp13fj5v3xar8mlks-python3.9-charset-normalizer-2.0.8 x1ha2nyji1px0iqknbyhdnvw4icw5h3i-python3.9-idna-3.3 z9560qb4ygbi0352m9pglwhi332cxb1f-python3.9-urllib3-1.26.7 +Deriver: 2ch8jx910qk6721mp4yqsmvdfgj5c8ir-banking-0.3.0.drv +Sig: cache.nixos.org-1:xcL67rBZPcdVZudDLpLeddkBa0KaFTw5A0udnaa0axysjrQ6Nvd9p3BLZ4rhKgl52/cKiU3c6aq60L8+IcE5Dw== +"#; + +lazy_static! { + static ref CASES: &'static [&'static str] = { + let data = + zstd::decode_all(io::Cursor::new(include_bytes!("../testdata/narinfo.zst"))).unwrap(); + let data = str::from_utf8(Vec::leak(data)).unwrap(); + Vec::leak( + data.split_inclusive("\n\n") + .map(|s| s.strip_suffix('\n').unwrap()) + .collect::<Vec<_>>(), + ) + }; +} + +pub fn parse(c: &mut Criterion) { + let mut g = c.benchmark_group("parse"); + + { + g.throughput(Throughput::Bytes(SAMPLE.len() as u64)); + g.bench_with_input("single", SAMPLE, |b, data| { + b.iter(|| { + black_box(NarInfo::parse(black_box(data)).ok().unwrap()); + }); + }); + } + + { + for &case in *CASES { + NarInfo::parse(case).expect("should parse"); + } + + g.throughput(Throughput::Bytes( + CASES.iter().map(|s| s.len() as u64).sum(), + )); + g.bench_with_input("many", &*CASES, |b, data| { + let mut vec = vec![]; + b.iter(|| { + vec.clear(); + vec.extend( + black_box(data) + .iter() + .map(|s| NarInfo::parse(s).ok().unwrap()), + ); + black_box(&vec); + }); + }); + } + + g.finish(); +} + +criterion_group!(benches, parse); +criterion_main!(benches); diff --git a/tvix/nix-compat/default.nix b/tvix/nix-compat/default.nix new file mode 100644 index 0000000000..9df76e12fc --- /dev/null +++ b/tvix/nix-compat/default.nix @@ -0,0 +1,7 @@ +{ depot, ... }: + +depot.tvix.crates.workspaceMembers.nix-compat.build.override { + runTests = true; + # make sure we also enable async here, so run the tests behind that feature flag. + features = [ "default" "async" "wire" ]; +} diff --git a/tvix/nix-compat/src/aterm/escape.rs b/tvix/nix-compat/src/aterm/escape.rs new file mode 100644 index 0000000000..80a85d2103 --- /dev/null +++ b/tvix/nix-compat/src/aterm/escape.rs @@ -0,0 +1,28 @@ +use bstr::ByteSlice; + +/// Escapes a byte sequence. Does not add surrounding quotes. +pub fn escape_bytes<P: AsRef<[u8]>>(s: P) -> Vec<u8> { + let mut s: Vec<u8> = s.as_ref().to_vec(); + + s = s.replace(b"\\", b"\\\\"); + s = s.replace(b"\n", b"\\n"); + s = s.replace(b"\r", b"\\r"); + s = s.replace(b"\t", b"\\t"); + s = s.replace(b"\"", b"\\\""); + + s +} + +#[cfg(test)] +mod tests { + use super::escape_bytes; + use rstest::rstest; + + #[rstest] + #[case::empty(b"", b"")] + #[case::doublequote(b"\"", b"\\\"")] + #[case::colon(b":", b":")] + fn escape(#[case] input: &[u8], #[case] expected: &[u8]) { + assert_eq!(expected, escape_bytes(input)) + } +} diff --git a/tvix/nix-compat/src/aterm/mod.rs b/tvix/nix-compat/src/aterm/mod.rs new file mode 100644 index 0000000000..8806b6caf2 --- /dev/null +++ b/tvix/nix-compat/src/aterm/mod.rs @@ -0,0 +1,7 @@ +mod escape; +mod parser; + +pub(crate) use escape::escape_bytes; +pub(crate) use parser::parse_bstr_field; +pub(crate) use parser::parse_str_list; +pub(crate) use parser::parse_string_field; diff --git a/tvix/nix-compat/src/aterm/parser.rs b/tvix/nix-compat/src/aterm/parser.rs new file mode 100644 index 0000000000..a30cb40ab0 --- /dev/null +++ b/tvix/nix-compat/src/aterm/parser.rs @@ -0,0 +1,125 @@ +//! This module implements parsing code for some basic building blocks +//! of the [ATerm][] format, which is used by C++ Nix to serialize Derivations. +//! +//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html +use bstr::BString; +use nom::branch::alt; +use nom::bytes::complete::{escaped_transform, is_not, tag}; +use nom::character::complete::char as nomchar; +use nom::combinator::{map, value}; +use nom::multi::separated_list0; +use nom::sequence::delimited; +use nom::IResult; + +/// Parse a bstr and undo any escaping. +fn parse_escaped_bstr(i: &[u8]) -> IResult<&[u8], BString> { + escaped_transform( + is_not("\"\\"), + '\\', + alt(( + value("\\".as_bytes(), nomchar('\\')), + value("\n".as_bytes(), nomchar('n')), + value("\t".as_bytes(), nomchar('t')), + value("\r".as_bytes(), nomchar('r')), + value("\"".as_bytes(), nomchar('\"')), + )), + )(i) + .map(|(i, v)| (i, BString::new(v))) +} + +/// Parse a field in double quotes, undo any escaping, and return the unquoted +/// and decoded `Vec<u8>`. +pub(crate) fn parse_bstr_field(i: &[u8]) -> IResult<&[u8], BString> { + // inside double quotes… + delimited( + nomchar('\"'), + // There is + alt(( + // …either is a bstr after unescaping + parse_escaped_bstr, + // …or an empty string. + map(tag(b""), |_| BString::default()), + )), + nomchar('\"'), + )(i) +} + +/// Parse a field in double quotes, undo any escaping, and return the unquoted +/// and decoded string, if it's a valid string. Or fail parsing if the bytes are +/// no valid UTF-8. +pub(crate) fn parse_string_field(i: &[u8]) -> IResult<&[u8], String> { + // inside double quotes… + delimited( + nomchar('\"'), + // There is + alt(( + // either is a String after unescaping + nom::combinator::map_opt(parse_escaped_bstr, |escaped_bstr| { + String::from_utf8(escaped_bstr.into()).ok() + }), + // or an empty string. + map(tag(b""), |_| String::new()), + )), + nomchar('\"'), + )(i) +} + +/// Parse a list of of string fields (enclosed in brackets) +pub(crate) fn parse_str_list(i: &[u8]) -> IResult<&[u8], Vec<String>> { + // inside brackets + delimited( + nomchar('['), + separated_list0(nomchar(','), parse_string_field), + nomchar(']'), + )(i) +} + +#[cfg(test)] +mod tests { + use rstest::rstest; + + #[rstest] + #[case::empty(br#""""#, b"", b"")] + #[case::hello_world(br#""Hello World""#, b"Hello World", b"")] + #[case::doublequote(br#""\"""#, br#"""#, b"")] + #[case::colon(br#"":""#, b":", b"")] + #[case::doublequote_rest(br#""\""Rest"#, br#"""#, b"Rest")] + fn test_parse_bstr_field( + #[case] input: &[u8], + #[case] expected: &[u8], + #[case] exp_rest: &[u8], + ) { + let (rest, parsed) = super::parse_bstr_field(input).expect("must parse"); + assert_eq!(exp_rest, rest, "expected remainder"); + assert_eq!(expected, parsed); + } + + #[rstest] + #[case::empty(br#""""#, "", b"")] + #[case::hello_world(br#""Hello World""#, "Hello World", b"")] + #[case::doublequote(br#""\"""#, r#"""#, b"")] + #[case::colon(br#"":""#, ":", b"")] + #[case::doublequote_rest(br#""\""Rest"#, r#"""#, b"Rest")] + fn parse_string_field(#[case] input: &[u8], #[case] expected: &str, #[case] exp_rest: &[u8]) { + let (rest, parsed) = super::parse_string_field(input).expect("must parse"); + assert_eq!(exp_rest, rest, "expected remainder"); + assert_eq!(expected, &parsed); + } + + #[test] + fn parse_string_field_invalid_encoding_fail() { + let input: Vec<_> = vec![b'"', 0xc5, 0xc4, 0xd6, b'"']; + + super::parse_string_field(&input).expect_err("must fail"); + } + + #[rstest] + #[case::single_foo(br#"["foo"]"#, vec!["foo".to_string()], b"")] + #[case::empty_list(b"[]", vec![], b"")] + #[case::empty_list_with_rest(b"[]blub", vec![], b"blub")] + fn parse_list(#[case] input: &[u8], #[case] expected: Vec<String>, #[case] exp_rest: &[u8]) { + let (rest, parsed) = super::parse_str_list(input).expect("must parse"); + assert_eq!(exp_rest, rest, "expected remainder"); + assert_eq!(expected, parsed); + } +} diff --git a/tvix/nix-compat/src/bin/drvfmt.rs b/tvix/nix-compat/src/bin/drvfmt.rs new file mode 100644 index 0000000000..ddc1f0389f --- /dev/null +++ b/tvix/nix-compat/src/bin/drvfmt.rs @@ -0,0 +1,42 @@ +use std::{collections::BTreeMap, io::Read}; + +use nix_compat::derivation::Derivation; +use serde_json::json; + +/// construct a serde_json::Value from a Derivation. +/// Some environment values can be non-valid UTF-8 strings. +/// `serde_json` prints them out really unreadable. +/// This is a tool to print A-Terms in a more readable fashion, so we brutally +/// use the [std::string::ToString] implementation of [bstr::BString] to get +/// a UTF-8 string (replacing invalid characters with the Unicode replacement +/// codepoint). +fn build_serde_json_value(drv: Derivation) -> serde_json::Value { + json!({ + "args": drv.arguments, + "builder": drv.builder, + "env": drv.environment.into_iter().map(|(k,v)| (k, v.to_string())).collect::<BTreeMap<String, String>>(), + "inputDrvs": drv.input_derivations, + "inputSrcs": drv.input_sources, + "outputs": drv.outputs, + "system": drv.system, + }) +} + +fn main() { + // read A-Term from stdin + let mut buf = Vec::new(); + std::io::stdin() + .read_to_end(&mut buf) + .expect("failed to read from stdin"); + + match Derivation::from_aterm_bytes(&buf) { + Ok(drv) => { + println!( + "{}", + serde_json::to_string_pretty(&build_serde_json_value(drv)) + .expect("unable to serialize") + ); + } + Err(e) => eprintln!("unable to parse derivation: {:#?}", e), + } +} diff --git a/tvix/nix-compat/src/derivation/errors.rs b/tvix/nix-compat/src/derivation/errors.rs new file mode 100644 index 0000000000..452231f19d --- /dev/null +++ b/tvix/nix-compat/src/derivation/errors.rs @@ -0,0 +1,60 @@ +//! Contains [DerivationError], exported as [crate::derivation::DerivationError] +use crate::store_path; +use thiserror::Error; + +use super::CAHash; + +/// Errors that can occur during the validation of Derivation structs. +#[derive(Debug, Error, PartialEq)] +pub enum DerivationError { + // outputs + #[error("no outputs defined")] + NoOutputs(), + #[error("invalid output name: {0}")] + InvalidOutputName(String), + #[error("encountered fixed-output derivation, but more than 1 output in total")] + MoreThanOneOutputButFixed(), + #[error("invalid output name for fixed-output derivation: {0}")] + InvalidOutputNameForFixed(String), + #[error("unable to validate output {0}: {1}")] + InvalidOutput(String, OutputError), + #[error("unable to validate output {0}: {1}")] + InvalidOutputDerivationPath(String, store_path::BuildStorePathError), + // input derivation + #[error("unable to parse input derivation path {0}: {1}")] + InvalidInputDerivationPath(String, store_path::Error), + #[error("input derivation {0} doesn't end with .drv")] + InvalidInputDerivationPrefix(String), + #[error("input derivation {0} output names are empty")] + EmptyInputDerivationOutputNames(String), + #[error("input derivation {0} output name {1} is invalid")] + InvalidInputDerivationOutputName(String, String), + + // input sources + #[error("unable to parse input sources path {0}: {1}")] + InvalidInputSourcesPath(String, store_path::Error), + + // platform + #[error("invalid platform field: {0}")] + InvalidPlatform(String), + + // builder + #[error("invalid builder field: {0}")] + InvalidBuilder(String), + + // environment + #[error("invalid environment key {0}")] + InvalidEnvironmentKey(String), +} + +/// Errors that can occur during the validation of a specific +// [crate::derivation::Output] of a [crate::derivation::Derviation]. +#[derive(Debug, Error, PartialEq)] +pub enum OutputError { + #[error("Invalid output path {0}: {1}")] + InvalidOutputPath(String, store_path::Error), + #[error("Missing output path")] + MissingOutputPath, + #[error("Invalid CAHash: {:?}", .0)] + InvalidCAHash(CAHash), +} diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs new file mode 100644 index 0000000000..6e12e3ea86 --- /dev/null +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -0,0 +1,305 @@ +use crate::store_path::{ + self, build_ca_path, build_output_path, build_text_path, StorePath, StorePathRef, +}; +use bstr::BString; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use std::collections::{BTreeMap, BTreeSet}; +use std::io; + +mod errors; +mod output; +mod parse_error; +mod parser; +mod validate; +mod write; + +#[cfg(test)] +mod tests; + +// Public API of the crate. +pub use crate::nixhash::{CAHash, NixHash}; +pub use errors::{DerivationError, OutputError}; +pub use output::Output; + +use self::write::AtermWriteable; + +#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] +pub struct Derivation { + #[serde(rename = "args")] + pub arguments: Vec<String>, + + pub builder: String, + + #[serde(rename = "env")] + pub environment: BTreeMap<String, BString>, + + /// Map from drv path to output names used from this derivation. + #[serde(rename = "inputDrvs")] + pub input_derivations: BTreeMap<StorePath, BTreeSet<String>>, + + /// Plain store paths of additional inputs. + #[serde(rename = "inputSrcs")] + pub input_sources: BTreeSet<StorePath>, + + /// Maps output names to Output. + pub outputs: BTreeMap<String, Output>, + + pub system: String, +} + +impl Derivation { + /// write the Derivation to the given [std::io::Write], in ATerm format. + /// + /// The only errors returns are these when writing to the passed writer. + pub fn serialize(&self, writer: &mut impl std::io::Write) -> Result<(), io::Error> { + self.serialize_with_replacements(writer, &self.input_derivations) + } + + /// Like `serialize` but allow replacing the input_derivations for hash calculations. + fn serialize_with_replacements( + &self, + writer: &mut impl std::io::Write, + input_derivations: &BTreeMap<impl AtermWriteable, BTreeSet<String>>, + ) -> Result<(), io::Error> { + use write::*; + + writer.write_all(write::DERIVATION_PREFIX.as_bytes())?; + write_char(writer, write::PAREN_OPEN)?; + + write_outputs(writer, &self.outputs)?; + write_char(writer, COMMA)?; + + write_input_derivations(writer, input_derivations)?; + write_char(writer, COMMA)?; + + write_input_sources(writer, &self.input_sources)?; + write_char(writer, COMMA)?; + + write_system(writer, &self.system)?; + write_char(writer, COMMA)?; + + write_builder(writer, &self.builder)?; + write_char(writer, COMMA)?; + + write_arguments(writer, &self.arguments)?; + write_char(writer, COMMA)?; + + write_environment(writer, &self.environment)?; + + write_char(writer, PAREN_CLOSE)?; + + Ok(()) + } + + /// return the ATerm serialization. + pub fn to_aterm_bytes(&self) -> Vec<u8> { + self.to_aterm_bytes_with_replacements(&self.input_derivations) + } + + /// Like `to_aterm_bytes`, but accept a different BTreeMap for input_derivations. + /// This is used to render the ATerm representation of a Derivation "modulo + /// fixed-output derivations". + fn to_aterm_bytes_with_replacements( + &self, + input_derivations: &BTreeMap<impl AtermWriteable, BTreeSet<String>>, + ) -> Vec<u8> { + let mut buffer: Vec<u8> = Vec::new(); + + // invoke serialize and write to the buffer. + // Note we only propagate errors writing to the writer in serialize, + // which won't panic for the string we write to. + self.serialize_with_replacements(&mut buffer, input_derivations) + .unwrap(); + + buffer + } + + /// Parse an Derivation in ATerm serialization, and validate it passes our + /// set of validations. + pub fn from_aterm_bytes(b: &[u8]) -> Result<Derivation, parser::Error<&[u8]>> { + parser::parse(b) + } + + /// Returns the drv path of a [Derivation] struct. + /// + /// The drv path is calculated by invoking [build_text_path], using + /// the `name` with a `.drv` suffix as name, all [Derivation::input_sources] and + /// keys of [Derivation::input_derivations] as references, and the ATerm string of + /// the [Derivation] as content. + pub fn calculate_derivation_path(&self, name: &str) -> Result<StorePath, DerivationError> { + // append .drv to the name + let name = &format!("{}.drv", name); + + // collect the list of paths from input_sources and input_derivations + // into a (sorted, guaranteed by BTreeSet) list of references + let references: BTreeSet<String> = self + .input_sources + .iter() + .chain(self.input_derivations.keys()) + .map(StorePath::to_absolute_path) + .collect(); + + build_text_path(name, self.to_aterm_bytes(), references) + .map(|s| s.to_owned()) + .map_err(|_e| DerivationError::InvalidOutputName(name.to_string())) + } + + /// Returns the FOD digest, if the derivation is fixed-output, or None if + /// it's not. + /// TODO: this is kinda the string from [build_ca_path] with a + /// [CAHash::Flat], what's fed to `build_store_path_from_fingerprint_parts` + /// (except the out_output.path being an empty string) + pub fn fod_digest(&self) -> Option<[u8; 32]> { + if self.outputs.len() != 1 { + return None; + } + + let out_output = self.outputs.get("out")?; + let ca_hash = &out_output.ca_hash.as_ref()?; + + Some( + Sha256::new_with_prefix(format!( + "fixed:out:{}{}:{}", + ca_kind_prefix(ca_hash), + ca_hash.hash().to_nix_hex_string(), + out_output + .path + .as_ref() + .map(StorePath::to_absolute_path) + .as_ref() + .map(|s| s as &str) + .unwrap_or(""), + )) + .finalize() + .into(), + ) + } + + /// Calculates the hash of a derivation modulo fixed-output subderivations. + /// + /// This is called `hashDerivationModulo` in nixcpp. + /// + /// It returns the sha256 digest of the derivation ATerm representation, + /// except that: + /// - any input derivation paths have beed replaced "by the result of a + /// recursive call to this function" and that + /// - for fixed-output derivations the special + /// `fixed:out:${algo}:${digest}:${fodPath}` string is hashed instead of + /// the A-Term. + /// + /// It's up to the caller of this function to provide a (infallible) lookup + /// function to query [hash_derivation_modulo] of direct input derivations, + /// by their [StorePathRef]. + /// It will only be called in case the derivation is not a fixed-output + /// derivation. + pub fn hash_derivation_modulo<F>(&self, fn_lookup_hash_derivation_modulo: F) -> [u8; 32] + where + F: Fn(&StorePathRef) -> [u8; 32], + { + // Fixed-output derivations return a fixed hash. + // Non-Fixed-output derivations return the sha256 digest of the ATerm + // notation, but with all input_derivation paths replaced by a recursive + // call to this function. + // We call [fn_lookup_hash_derivation_modulo] rather than recursing + // ourselves, so callers can precompute this. + self.fod_digest().unwrap_or({ + // For each input_derivation, look up the hash derivation modulo, + // and replace the derivation path in the aterm with it's HEXLOWER digest. + let aterm_bytes = self.to_aterm_bytes_with_replacements(&BTreeMap::from_iter( + self.input_derivations + .iter() + .map(|(drv_path, output_names)| { + let hash = fn_lookup_hash_derivation_modulo(&drv_path.into()); + + (hash, output_names.to_owned()) + }), + )); + + // write the ATerm of that to the hash function and return its digest. + Sha256::new_with_prefix(aterm_bytes).finalize().into() + }) + } + + /// This calculates all output paths of a Derivation and updates the struct. + /// It requires the struct to be initially without output paths. + /// This means, self.outputs[$outputName].path needs to be an empty string, + /// and self.environment[$outputName] needs to be an empty string. + /// + /// Output path calculation requires knowledge of the + /// [hash_derivation_modulo], which (in case of non-fixed-output + /// derivations) also requires knowledge of the [hash_derivation_modulo] of + /// input derivations (recursively). + /// + /// To avoid recursing and doing unnecessary calculation, we simply + /// ask the caller of this function to provide the result of the + /// [hash_derivation_modulo] call of the current [Derivation], + /// and leave it up to them to calculate it when needed. + /// + /// On completion, `self.environment[$outputName]` and + /// `self.outputs[$outputName].path` are set to the calculated output path for all + /// outputs. + pub fn calculate_output_paths( + &mut self, + name: &str, + hash_derivation_modulo: &[u8; 32], + ) -> Result<(), DerivationError> { + // The fingerprint and hash differs per output + for (output_name, output) in self.outputs.iter_mut() { + // Assert that outputs are not yet populated, to avoid using this function wrongly. + // We don't also go over self.environment, but it's a sufficient + // footgun prevention mechanism. + assert!(output.path.is_none()); + + let path_name = output_path_name(name, output_name); + + // For fixed output derivation we use [build_ca_path], otherwise we + // use [build_output_path] with [hash_derivation_modulo]. + let abs_store_path = if let Some(ref hwm) = output.ca_hash { + build_ca_path(&path_name, hwm, Vec::<String>::new(), false).map_err(|e| { + DerivationError::InvalidOutputDerivationPath(output_name.to_string(), e) + })? + } else { + build_output_path(hash_derivation_modulo, output_name, &path_name).map_err(|e| { + DerivationError::InvalidOutputDerivationPath( + output_name.to_string(), + store_path::BuildStorePathError::InvalidStorePath(e), + ) + })? + }; + + output.path = Some(abs_store_path.to_owned()); + self.environment.insert( + output_name.to_string(), + abs_store_path.to_absolute_path().into(), + ); + } + + Ok(()) + } +} + +/// Calculate the name part of the store path of a derivation [Output]. +/// +/// It's the name, and (if it's the non-out output), the output name +/// after a `-`. +fn output_path_name(derivation_name: &str, output_name: &str) -> String { + let mut output_path_name = derivation_name.to_string(); + if output_name != "out" { + output_path_name.push('-'); + output_path_name.push_str(output_name); + } + output_path_name +} + +/// For a [CAHash], return the "prefix" used for NAR purposes. +/// For [CAHash::Flat], this is an empty string, for [CAHash::Nar], it's "r:". +/// Panics for other [CAHash] kinds, as they're not valid in a derivation +/// context. +fn ca_kind_prefix(ca_hash: &CAHash) -> &'static str { + match ca_hash { + CAHash::Flat(_) => "", + CAHash::Nar(_) => "r:", + _ => panic!("invalid ca hash in derivation context: {:?}", ca_hash), + } +} diff --git a/tvix/nix-compat/src/derivation/output.rs b/tvix/nix-compat/src/derivation/output.rs new file mode 100644 index 0000000000..266617f587 --- /dev/null +++ b/tvix/nix-compat/src/derivation/output.rs @@ -0,0 +1,189 @@ +use crate::nixhash::CAHash; +use crate::store_path::StorePathRef; +use crate::{derivation::OutputError, store_path::StorePath}; +use serde::de::Unexpected; +use serde::{Deserialize, Serialize}; +use serde_json::Map; +use std::borrow::Cow; + +/// References the derivation output. +#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)] +pub struct Output { + /// Store path of build result. + pub path: Option<StorePath>, + + #[serde(flatten)] + pub ca_hash: Option<CAHash>, // we can only represent a subset here. +} + +impl<'de> Deserialize<'de> for Output { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + let fields = Map::deserialize(deserializer)?; + let path: &str = fields + .get("path") + .ok_or(serde::de::Error::missing_field( + "`path` is missing but required for outputs", + ))? + .as_str() + .ok_or(serde::de::Error::invalid_type( + serde::de::Unexpected::Other("certainly not a string"), + &"a string", + ))?; + + let path = StorePathRef::from_absolute_path(path.as_bytes()) + .map_err(|_| serde::de::Error::invalid_value(Unexpected::Str(path), &"StorePath"))?; + Ok(Self { + path: Some(path.to_owned()), + ca_hash: CAHash::from_map::<D>(&fields)?, + }) + } +} + +impl Output { + pub fn is_fixed(&self) -> bool { + self.ca_hash.is_some() + } + + /// The output path as a string -- use `""` to indicate an unset output path. + pub fn path_str(&self) -> Cow<str> { + match &self.path { + None => Cow::Borrowed(""), + Some(path) => Cow::Owned(path.to_absolute_path()), + } + } + + pub fn validate(&self, validate_output_paths: bool) -> Result<(), OutputError> { + if let Some(fixed_output_hash) = &self.ca_hash { + match fixed_output_hash { + CAHash::Flat(_) | CAHash::Nar(_) => { + // all hashes allowed for Flat, and Nar. + } + _ => return Err(OutputError::InvalidCAHash(fixed_output_hash.clone())), + } + } + + if validate_output_paths && self.path.is_none() { + return Err(OutputError::MissingOutputPath); + } + Ok(()) + } +} + +/// This ensures that a potentially valid input addressed +/// output is deserialized as a non-fixed output. +#[test] +fn deserialize_valid_input_addressed_output() { + let json_bytes = r#" + { + "path": "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432" + }"#; + let output: Output = serde_json::from_str(json_bytes).expect("must parse"); + + assert!(!output.is_fixed()); +} + +/// This ensures that a potentially valid fixed output +/// output deserializes fine as a fixed output. +#[test] +fn deserialize_valid_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha256" + }"#; + let output: Output = serde_json::from_str(json_bytes).expect("must parse"); + + assert!(output.is_fixed()); +} + +/// This ensures that parsing an input with the invalid hash encoding +/// will result in a parsing failure. +#[test] +fn deserialize_with_error_invalid_hash_encoding_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + "hash": "IAMNOTVALIDNIXBASE32", + "hashAlgo": "r:sha256" + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the wrong hash algo +/// will result in a parsing failure. +#[test] +fn deserialize_with_error_invalid_hash_algo_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha1024" + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the missing hash algo but present hash will result in a +/// parsing failure. +#[test] +fn deserialize_with_error_missing_hash_algo_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the missing hash but present hash algo will result in a +/// parsing failure. +#[test] +fn deserialize_with_error_missing_hash_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + "hashAlgo": "r:sha1024" + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} + +#[test] +fn serialize_deserialize() { + let json_bytes = r#" + { + "path": "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432" + }"#; + let output: Output = serde_json::from_str(json_bytes).expect("must parse"); + + let s = serde_json::to_string(&output).expect("Serialize"); + let output2: Output = serde_json::from_str(&s).expect("must parse again"); + + assert_eq!(output, output2); +} + +#[test] +fn serialize_deserialize_fixed() { + let json_bytes = r#" + { + "path": "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha256" + }"#; + let output: Output = serde_json::from_str(json_bytes).expect("must parse"); + + let s = serde_json::to_string_pretty(&output).expect("Serialize"); + let output2: Output = serde_json::from_str(&s).expect("must parse again"); + + assert_eq!(output, output2); +} diff --git a/tvix/nix-compat/src/derivation/parse_error.rs b/tvix/nix-compat/src/derivation/parse_error.rs new file mode 100644 index 0000000000..fc97f1a988 --- /dev/null +++ b/tvix/nix-compat/src/derivation/parse_error.rs @@ -0,0 +1,87 @@ +//! This contains error and result types that can happen while parsing +//! Derivations from ATerm. +use nom::IResult; + +use crate::{ + nixhash, + store_path::{self, StorePath}, +}; + +pub type NomResult<I, O> = IResult<I, O, NomError<I>>; + +#[derive(Debug, thiserror::Error, PartialEq)] +pub enum ErrorKind { + /// duplicate key in map + #[error("duplicate map key: {0}")] + DuplicateMapKey(String), + + /// Input derivation has two outputs with the same name + #[error("duplicate output name {1} for input derivation {0}")] + DuplicateInputDerivationOutputName(String, String), + + #[error("duplicate input source: {0}")] + DuplicateInputSource(StorePath), + + #[error("nix hash error: {0}")] + NixHashError(nixhash::Error), + + #[error("store path error: {0}")] + StorePathError(#[from] store_path::Error), + + #[error("nom error: {0:?}")] + Nom(nom::error::ErrorKind), +} + +/// Our own error type to pass along parser-related errors. +#[derive(Debug, PartialEq)] +pub struct NomError<I> { + /// position of the error in the input data + pub input: I, + /// error code + pub code: ErrorKind, +} + +impl<I, E> nom::error::FromExternalError<I, E> for NomError<I> { + fn from_external_error(input: I, kind: nom::error::ErrorKind, _e: E) -> Self { + Self { + input, + code: ErrorKind::Nom(kind), + } + } +} + +impl<I> nom::error::ParseError<I> for NomError<I> { + fn from_error_kind(input: I, kind: nom::error::ErrorKind) -> Self { + Self { + input, + code: ErrorKind::Nom(kind), + } + } + + // FUTUREWORK: implement, so we have support for backtracking through the + // parse tree? + fn append(_input: I, _kind: nom::error::ErrorKind, other: Self) -> Self { + other + } +} + +/// This wraps a [nom::error::Error] into our error. +impl<I> From<nom::error::Error<I>> for NomError<I> { + fn from(value: nom::error::Error<I>) -> Self { + Self { + input: value.input, + code: ErrorKind::Nom(value.code), + } + } +} + +/// This essentially implements +/// `From<nom::Err<nom::error::Error<I>>>` for `nom::Err<NomError<I>>`, +/// which we can't because `nom::Err<_>` is a foreign type. +pub(crate) fn into_nomerror<I>(e: nom::Err<nom::error::Error<I>>) -> nom::Err<NomError<I>> { + match e { + nom::Err::Incomplete(n) => nom::Err::Incomplete(n), + nom::Err::Error(e) => nom::Err::Error(e.into()), + nom::Err::Failure(e) => nom::Err::Failure(e.into()), + } +} diff --git a/tvix/nix-compat/src/derivation/parser.rs b/tvix/nix-compat/src/derivation/parser.rs new file mode 100644 index 0000000000..2775294960 --- /dev/null +++ b/tvix/nix-compat/src/derivation/parser.rs @@ -0,0 +1,585 @@ +//! This module constructs a [Derivation] by parsing its [ATerm][] +//! serialization. +//! +//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html + +use bstr::BString; +use nom::bytes::complete::tag; +use nom::character::complete::char as nomchar; +use nom::combinator::{all_consuming, map_res}; +use nom::multi::{separated_list0, separated_list1}; +use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple}; +use std::collections::{btree_map, BTreeMap, BTreeSet}; +use thiserror; + +use crate::derivation::parse_error::{into_nomerror, ErrorKind, NomError, NomResult}; +use crate::derivation::{write, CAHash, Derivation, Output}; +use crate::store_path::{self, StorePath, StorePathRef}; +use crate::{aterm, nixhash}; + +#[derive(Debug, thiserror::Error)] +pub enum Error<I> { + #[error("parsing error: {0}")] + Parser(NomError<I>), + #[error("premature EOF")] + Incomplete, + #[error("validation error: {0}")] + Validation(super::DerivationError), +} + +pub(crate) fn parse(i: &[u8]) -> Result<Derivation, Error<&[u8]>> { + match all_consuming(parse_derivation)(i) { + Ok((rest, derivation)) => { + // this shouldn't happen, as all_consuming shouldn't return. + debug_assert!(rest.is_empty()); + + // invoke validate + derivation.validate(true).map_err(Error::Validation)?; + + Ok(derivation) + } + Err(nom::Err::Incomplete(_)) => Err(Error::Incomplete), + Err(nom::Err::Error(e) | nom::Err::Failure(e)) => Err(Error::Parser(e)), + } +} + +/// Consume a string containing the algo, and optionally a `r:` +/// prefix, and a digest (bytes), return a [CAHash::Nar] or [CAHash::Flat]. +fn from_algo_and_mode_and_digest<B: AsRef<[u8]>>( + algo_and_mode: &str, + digest: B, +) -> crate::nixhash::NixHashResult<CAHash> { + Ok(match algo_and_mode.strip_prefix("r:") { + Some(algo) => nixhash::CAHash::Nar(nixhash::from_algo_and_digest( + algo.try_into()?, + digest.as_ref(), + )?), + None => nixhash::CAHash::Flat(nixhash::from_algo_and_digest( + algo_and_mode.try_into()?, + digest.as_ref(), + )?), + }) +} + +/// Parse one output in ATerm. This is 4 string fields inside parans: +/// output name, output path, algo (and mode), digest. +/// Returns the output name and [Output] struct. +fn parse_output(i: &[u8]) -> NomResult<&[u8], (String, Output)> { + delimited( + nomchar('('), + map_res( + |i| { + tuple(( + terminated(aterm::parse_string_field, nomchar(',')), + terminated(aterm::parse_string_field, nomchar(',')), + terminated(aterm::parse_string_field, nomchar(',')), + aterm::parse_bstr_field, + ))(i) + .map_err(into_nomerror) + }, + |(output_name, output_path, algo_and_mode, encoded_digest)| { + // convert these 4 fields into an [Output]. + let ca_hash_res = { + if algo_and_mode.is_empty() && encoded_digest.is_empty() { + None + } else { + match data_encoding::HEXLOWER.decode(&encoded_digest) { + Ok(digest) => { + Some(from_algo_and_mode_and_digest(&algo_and_mode, digest)) + } + Err(e) => Some(Err(nixhash::Error::InvalidBase64Encoding(e))), + } + } + } + .transpose(); + + match ca_hash_res { + Ok(hash_with_mode) => Ok(( + output_name, + Output { + // TODO: Check if allowing empty paths here actually makes sense + // or we should make this code stricter. + path: if output_path.is_empty() { + None + } else { + Some(string_to_store_path(i, output_path)?) + }, + ca_hash: hash_with_mode, + }, + )), + Err(e) => Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::NixHashError(e), + })), + } + }, + ), + nomchar(')'), + )(i) +} + +/// Parse multiple outputs in ATerm. This is a list of things acccepted by +/// parse_output, and takes care of turning the (String, Output) returned from +/// it to a BTreeMap. +/// We don't use parse_kv here, as it's dealing with 2-tuples, and these are +/// 4-tuples. +fn parse_outputs(i: &[u8]) -> NomResult<&[u8], BTreeMap<String, Output>> { + let res = delimited( + nomchar('['), + separated_list1(tag(","), parse_output), + nomchar(']'), + )(i); + + match res { + Ok((rst, outputs_lst)) => { + let mut outputs: BTreeMap<String, Output> = BTreeMap::default(); + for (output_name, output) in outputs_lst.into_iter() { + if outputs.contains_key(&output_name) { + return Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::DuplicateMapKey(output_name), + })); + } + outputs.insert(output_name, output); + } + Ok((rst, outputs)) + } + // pass regular parse errors along + Err(e) => Err(e), + } +} + +fn parse_input_derivations(i: &[u8]) -> NomResult<&[u8], BTreeMap<StorePath, BTreeSet<String>>> { + let (i, input_derivations_list) = parse_kv::<Vec<String>, _>(aterm::parse_str_list)(i)?; + + // This is a HashMap of drv paths to a list of output names. + let mut input_derivations: BTreeMap<StorePath, BTreeSet<String>> = BTreeMap::new(); + + for (input_derivation, output_names) in input_derivations_list { + let mut new_output_names = BTreeSet::new(); + for output_name in output_names.into_iter() { + if new_output_names.contains(&output_name) { + return Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::DuplicateInputDerivationOutputName( + input_derivation.to_string(), + output_name.to_string(), + ), + })); + } + new_output_names.insert(output_name); + } + + let input_derivation: StorePath = string_to_store_path(i, input_derivation)?; + + input_derivations.insert(input_derivation, new_output_names); + } + + Ok((i, input_derivations)) +} + +fn parse_input_sources(i: &[u8]) -> NomResult<&[u8], BTreeSet<StorePath>> { + let (i, input_sources_lst) = aterm::parse_str_list(i).map_err(into_nomerror)?; + + let mut input_sources: BTreeSet<_> = BTreeSet::new(); + for input_source in input_sources_lst.into_iter() { + let input_source: StorePath = string_to_store_path(i, input_source)?; + if input_sources.contains(&input_source) { + return Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::DuplicateInputSource(input_source), + })); + } else { + input_sources.insert(input_source); + } + } + + Ok((i, input_sources)) +} + +fn string_to_store_path( + i: &[u8], + path_str: String, +) -> Result<StorePath, nom::Err<NomError<&[u8]>>> { + #[cfg(debug_assertions)] + let path_str2 = path_str.clone(); + + let path: StorePath = StorePathRef::from_absolute_path(path_str.as_bytes()) + .map_err(|e: store_path::Error| { + nom::Err::Failure(NomError { + input: i, + code: e.into(), + }) + })? + .to_owned(); + + #[cfg(debug_assertions)] + assert_eq!(path_str2, path.to_absolute_path()); + + Ok(path) +} + +pub fn parse_derivation(i: &[u8]) -> NomResult<&[u8], Derivation> { + use nom::Parser; + preceded( + tag(write::DERIVATION_PREFIX), + delimited( + // inside parens + nomchar('('), + // tuple requires all errors to be of the same type, so we need to be a + // bit verbose here wrapping generic IResult into [NomATermResult]. + tuple(( + // parse outputs + terminated(parse_outputs, nomchar(',')), + // // parse input derivations + terminated(parse_input_derivations, nomchar(',')), + // // parse input sources + terminated(parse_input_sources, nomchar(',')), + // // parse system + |i| terminated(aterm::parse_string_field, nomchar(','))(i).map_err(into_nomerror), + // // parse builder + |i| terminated(aterm::parse_string_field, nomchar(','))(i).map_err(into_nomerror), + // // parse arguments + |i| terminated(aterm::parse_str_list, nomchar(','))(i).map_err(into_nomerror), + // parse environment + parse_kv::<BString, _>(aterm::parse_bstr_field), + )), + nomchar(')'), + ) + .map( + |( + outputs, + input_derivations, + input_sources, + system, + builder, + arguments, + environment, + )| { + Derivation { + arguments, + builder, + environment, + input_derivations, + input_sources, + outputs, + system, + } + }, + ), + )(i) +} + +/// Parse a list of key/value pairs into a BTreeMap. +/// The parser for the values can be passed in. +/// In terms of ATerm, this is just a 2-tuple, +/// but we have the additional restriction that the first element needs to be +/// unique across all tuples. +pub(crate) fn parse_kv<'a, V, VF>( + vf: VF, +) -> impl FnMut(&'a [u8]) -> NomResult<&'a [u8], BTreeMap<String, V>> + 'static +where + VF: FnMut(&'a [u8]) -> nom::IResult<&'a [u8], V, nom::error::Error<&'a [u8]>> + Clone + 'static, +{ + move |i| + // inside brackets + delimited( + nomchar('['), + |ii| { + let res = separated_list0( + nomchar(','), + // inside parens + delimited( + nomchar('('), + separated_pair( + aterm::parse_string_field, + nomchar(','), + vf.clone(), + ), + nomchar(')'), + ), + )(ii).map_err(into_nomerror); + + match res { + Ok((rest, pairs)) => { + let mut kvs: BTreeMap<String, V> = BTreeMap::new(); + for (k, v) in pairs.into_iter() { + // collect the 2-tuple to a BTreeMap, + // and fail if the key was already seen before. + match kvs.entry(k) { + btree_map::Entry::Vacant(e) => { e.insert(v); }, + btree_map::Entry::Occupied(e) => { + return Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::DuplicateMapKey(e.key().clone()), + })); + } + } + } + Ok((rest, kvs)) + } + Err(e) => Err(e), + } + }, + nomchar(']'), + )(i) +} + +#[cfg(test)] +mod tests { + use crate::store_path::StorePathRef; + use std::collections::{BTreeMap, BTreeSet}; + + use crate::{ + derivation::{ + parse_error::ErrorKind, parser::from_algo_and_mode_and_digest, CAHash, NixHash, Output, + }, + store_path::StorePath, + }; + use bstr::{BString, ByteSlice}; + use hex_literal::hex; + use lazy_static::lazy_static; + use rstest::rstest; + + const DIGEST_SHA256: [u8; 32] = + hex!("a5ce9c155ed09397614646c9717fc7cd94b1023d7b76b618d409e4fefd6e9d39"); + + lazy_static! { + pub static ref NIXHASH_SHA256: NixHash = NixHash::Sha256(DIGEST_SHA256); + static ref EXP_MULTI_OUTPUTS: BTreeMap<String, Output> = { + let mut b = BTreeMap::new(); + b.insert( + "lib".to_string(), + Output { + path: Some( + StorePath::from_bytes( + b"2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib", + ) + .unwrap(), + ), + ca_hash: None, + }, + ); + b.insert( + "out".to_string(), + Output { + path: Some( + StorePath::from_bytes( + b"55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out".as_bytes(), + ) + .unwrap(), + ), + ca_hash: None, + }, + ); + b + }; + static ref EXP_AB_MAP: BTreeMap<String, BString> = { + let mut b = BTreeMap::new(); + b.insert("a".to_string(), b"1".as_bstr().to_owned()); + b.insert("b".to_string(), b"2".as_bstr().to_owned()); + b + }; + static ref EXP_INPUT_DERIVATIONS_SIMPLE: BTreeMap<StorePath, BTreeSet<String>> = { + let mut b = BTreeMap::new(); + b.insert( + StorePath::from_bytes(b"8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv") + .unwrap(), + { + let mut output_names = BTreeSet::new(); + output_names.insert("out".to_string()); + output_names + }, + ); + b.insert( + StorePath::from_bytes(b"p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv") + .unwrap(), + { + let mut output_names = BTreeSet::new(); + output_names.insert("out".to_string()); + output_names.insert("lib".to_string()); + output_names + }, + ); + b + }; + static ref EXP_INPUT_DERIVATIONS_SIMPLE_ATERM: String = { + format!( + "[(\"{0}\",[\"out\"]),(\"{1}\",[\"out\",\"lib\"])]", + "/nix/store/8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv", + "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv" + ) + }; + static ref EXP_INPUT_SOURCES_SIMPLE: BTreeSet<String> = { + let mut b = BTreeSet::new(); + b.insert("/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out".to_string()); + b.insert("/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib".to_string()); + b + }; + } + + /// Ensure parsing KVs works + #[rstest] + #[case::empty(b"[]", &BTreeMap::new(), b"")] + #[case::simple(b"[(\"a\",\"1\"),(\"b\",\"2\")]", &EXP_AB_MAP, b"")] + fn parse_kv( + #[case] input: &'static [u8], + #[case] expected: &BTreeMap<String, BString>, + #[case] exp_rest: &[u8], + ) { + let (rest, parsed) = super::parse_kv::<BString, _>(crate::aterm::parse_bstr_field)(input) + .expect("must parse"); + assert_eq!(exp_rest, rest, "expected remainder"); + assert_eq!(*expected, parsed); + } + + /// Ensures the kv parser complains about duplicate map keys + #[test] + fn parse_kv_fail_dup_keys() { + let input: &'static [u8] = b"[(\"a\",\"1\"),(\"a\",\"2\")]"; + let e = super::parse_kv::<BString, _>(crate::aterm::parse_bstr_field)(input) + .expect_err("must fail"); + + match e { + nom::Err::Failure(e) => { + assert_eq!(ErrorKind::DuplicateMapKey("a".to_string()), e.code); + } + _ => panic!("unexpected error"), + } + } + + /// Ensure parsing input derivations works. + #[rstest] + #[case::empty(b"[]", &BTreeMap::new())] + #[case::simple(EXP_INPUT_DERIVATIONS_SIMPLE_ATERM.as_bytes(), &EXP_INPUT_DERIVATIONS_SIMPLE)] + fn parse_input_derivations( + #[case] input: &'static [u8], + #[case] expected: &BTreeMap<StorePath, BTreeSet<String>>, + ) { + let (rest, parsed) = super::parse_input_derivations(input).expect("must parse"); + + assert_eq!(expected, &parsed, "parsed mismatch"); + assert!(rest.is_empty(), "rest must be empty"); + } + + /// Ensures the input derivation parser complains about duplicate output names + #[test] + fn parse_input_derivations_fail_dup_output_names() { + let input_str = format!( + "[(\"{0}\",[\"out\"]),(\"{1}\",[\"out\",\"out\"])]", + "/nix/store/8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv", + "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv" + ); + let e = super::parse_input_derivations(input_str.as_bytes()).expect_err("must fail"); + + match e { + nom::Err::Failure(e) => { + assert_eq!( + ErrorKind::DuplicateInputDerivationOutputName( + "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv".to_string(), + "out".to_string() + ), + e.code + ); + } + _ => panic!("unexpected error"), + } + } + + /// Ensure parsing input sources works + #[rstest] + #[case::empty(b"[]", &BTreeSet::new())] + #[case::simple(b"[\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out\",\"/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib\"]", &EXP_INPUT_SOURCES_SIMPLE)] + fn parse_input_sources(#[case] input: &'static [u8], #[case] expected: &BTreeSet<String>) { + let (rest, parsed) = super::parse_input_sources(input).expect("must parse"); + + assert_eq!( + expected, + &parsed + .iter() + .map(StorePath::to_absolute_path) + .collect::<BTreeSet<_>>(), + "parsed mismatch" + ); + assert!(rest.is_empty(), "rest must be empty"); + } + + /// Ensures the input sources parser complains about duplicate input sources + #[test] + fn parse_input_sources_fail_dup_keys() { + let input: &'static [u8] = b"[\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo\",\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo\"]"; + let e = super::parse_input_sources(input).expect_err("must fail"); + + match e { + nom::Err::Failure(e) => { + assert_eq!( + ErrorKind::DuplicateInputSource( + StorePathRef::from_absolute_path( + "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo".as_bytes() + ) + .unwrap() + .to_owned() + ), + e.code + ); + } + _ => panic!("unexpected error"), + } + } + + #[rstest] + #[case::simple( + br#"("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")"#, + ("out".to_string(), Output { + path: Some( + StorePathRef::from_absolute_path("/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo".as_bytes()).unwrap().to_owned()), + ca_hash: None + }) + )] + #[case::fod( + br#"("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar","r:sha256","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba")"#, + ("out".to_string(), Output { + path: Some( + StorePathRef::from_absolute_path( + "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar".as_bytes()).unwrap().to_owned()), + ca_hash: Some(from_algo_and_mode_and_digest("r:sha256", + data_encoding::HEXLOWER.decode(b"08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba").unwrap() ).unwrap()), + }) + )] + fn parse_output(#[case] input: &[u8], #[case] expected: (String, Output)) { + let (rest, parsed) = super::parse_output(input).expect("must parse"); + assert!(rest.is_empty()); + assert_eq!(expected, parsed); + } + + #[rstest] + #[case::multi_out( + br#"[("lib","/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib","",""),("out","/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out","","")]"#, + &EXP_MULTI_OUTPUTS + )] + fn parse_outputs(#[case] input: &[u8], #[case] expected: &BTreeMap<String, Output>) { + let (rest, parsed) = super::parse_outputs(input).expect("must parse"); + assert!(rest.is_empty()); + assert_eq!(*expected, parsed); + } + + #[rstest] + #[case::sha256_flat("sha256", &DIGEST_SHA256, CAHash::Flat(NIXHASH_SHA256.clone()))] + #[case::sha256_recursive("r:sha256", &DIGEST_SHA256, CAHash::Nar(NIXHASH_SHA256.clone()))] + fn test_from_algo_and_mode_and_digest( + #[case] algo_and_mode: &str, + #[case] digest: &[u8], + #[case] expected: CAHash, + ) { + assert_eq!( + expected, + from_algo_and_mode_and_digest(algo_and_mode, digest).unwrap() + ); + } + + #[test] + fn from_algo_and_mode_and_digest_failure() { + assert!(from_algo_and_mode_and_digest("r:sha256", []).is_err()); + assert!(from_algo_and_mode_and_digest("ha256", DIGEST_SHA256).is_err()); + } +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/duplicate.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/duplicate.drv new file mode 100644 index 0000000000..072561a29e --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/duplicate.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")],[("/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv",["out"])],[],":",":",[],[("bar","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar"),("builder",":"),("name","foo"),("name","bar"),("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv new file mode 100644 index 0000000000..a4fea3c5f4 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar","r:sha256","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba")],[],[],":",":",[],[("builder",":"),("name","bar"),("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar"),("outputHash","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba"),("outputHashAlgo","sha256"),("outputHashMode","recursive"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv.json new file mode 100644 index 0000000000..c8bbc4cbb5 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "name": "bar", + "out": "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar", + "outputHash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "outputHashAlgo": "sha256", + "outputHashMode": "recursive", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha256", + "path": "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv new file mode 100644 index 0000000000..f0d9230a5a --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json","","")],[],[],":",":",[],[("builder",":"),("json","{\"hello\":\"moto\\n\"}"),("name","nested-json"),("out","/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv.json new file mode 100644 index 0000000000..9cb0b43b4c --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv.json @@ -0,0 +1,19 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "json": "{\"hello\":\"moto\\n\"}", + "name": "nested-json", + "out": "/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv new file mode 100644 index 0000000000..a2cf9d31f9 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")],[("/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv",["out"])],[],":",":",[],[("bar","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar"),("builder",":"),("name","foo"),("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv.json new file mode 100644 index 0000000000..957a85ccab --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "bar": "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar", + "builder": ":", + "name": "foo", + "out": "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo", + "system": ":" + }, + "inputDrvs": { + "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv": [ + "out" + ] + }, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv new file mode 100644 index 0000000000..bbe88c02c7 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode","","")],[],[],":",":",[],[("builder",":"),("letters","räksmörgås\nrødgrød med fløde\nLübeck\n肥猪\nこんにちは / 今日は\n🌮\n"),("name","unicode"),("out","/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv.json new file mode 100644 index 0000000000..f8f33c1bba --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv.json @@ -0,0 +1,19 @@ +{ + "outputs": { + "out": { + "path": "/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode" + } + }, + "inputSrcs": [], + "inputDrvs": {}, + "system": ":", + "builder": ":", + "args": [], + "env": { + "builder": ":", + "letters": "räksmörgås\nrødgrød med fløde\nLübeck\n肥猪\nこんにちは / 今日は\n🌮\n", + "name": "unicode", + "out": "/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode", + "system": ":" + } +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv new file mode 100644 index 0000000000..4b9338c0b9 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs","","")],[],[],":",":",[],[("__json","{\"builder\":\":\",\"name\":\"structured-attrs\",\"system\":\":\"}"),("out","/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv.json new file mode 100644 index 0000000000..74e3d7df55 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv.json @@ -0,0 +1,16 @@ +{ + "args": [], + "builder": ":", + "env": { + "__json": "{\"builder\":\":\",\"name\":\"structured-attrs\",\"system\":\":\"}", + "out": "/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv new file mode 100644 index 0000000000..1699c2a75e --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo","","")],[("/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv",["out"])],[],":",":",[],[("bar","/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar"),("builder",":"),("name","foo"),("out","/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv.json new file mode 100644 index 0000000000..831d27956d --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "bar": "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar", + "builder": ":", + "name": "foo", + "out": "/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo", + "system": ":" + }, + "inputDrvs": { + "/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv": [ + "out" + ] + }, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv new file mode 100644 index 0000000000..523612238c --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv @@ -0,0 +1 @@ +Derive([("lib","/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib","",""),("out","/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out","","")],[],[],":",":",[],[("builder",":"),("lib","/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib"),("name","has-multi-out"),("out","/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out"),("outputs","out lib"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv.json new file mode 100644 index 0000000000..0bd7a2991c --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "lib": "/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib", + "name": "has-multi-out", + "out": "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out", + "outputs": "out lib", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "lib": { + "path": "/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib" + }, + "out": { + "path": "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv new file mode 100644 index 0000000000..6a7a35c58c --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252","","")],[],[],":",":",[],[("builder",":"),("chars",""),("name","cp1252"),("out","/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json new file mode 100644 index 0000000000..9d6ba8b797 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json @@ -0,0 +1,21 @@ +{ + "/nix/store/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv": { + "outputs": { + "out": { + "path": "/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252" + } + }, + "inputSrcs": [], + "inputDrvs": {}, + "system": ":", + "builder": ":", + "args": [], + "env": { + "builder": ":", + "chars": "", + "name": "cp1252", + "out": "/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252", + "system": ":" + } + } +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv new file mode 100644 index 0000000000..559e93ed0e --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar","r:sha1","0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33")],[],[],":",":",[],[("builder",":"),("name","bar"),("out","/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar"),("outputHash","0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33"),("outputHashAlgo","sha1"),("outputHashMode","recursive"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv.json new file mode 100644 index 0000000000..e297d27159 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "name": "bar", + "out": "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar", + "outputHash": "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33", + "outputHashAlgo": "sha1", + "outputHashMode": "recursive", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "hash": "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33", + "hashAlgo": "r:sha1", + "path": "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv new file mode 100644 index 0000000000..b19fd8eb2c --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1","","")],[],[],":",":",[],[("builder",":"),("chars",""),("name","latin1"),("out","/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json new file mode 100644 index 0000000000..ffd5c08da8 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json @@ -0,0 +1,21 @@ +{ + "/nix/store/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv": { + "outputs": { + "out": { + "path": "/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1" + } + }, + "inputSrcs": [], + "inputDrvs": {}, + "system": ":", + "builder": ":", + "args": [], + "env": { + "builder": ":", + "chars": "", + "name": "latin1", + "out": "/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1", + "system": ":" + } + } +} diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs new file mode 100644 index 0000000000..48d4e8926a --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -0,0 +1,436 @@ +use super::parse_error::ErrorKind; +use crate::derivation::output::Output; +use crate::derivation::parse_error::NomError; +use crate::derivation::parser::Error; +use crate::derivation::Derivation; +use crate::store_path::StorePath; +use bstr::{BStr, BString}; +use hex_literal::hex; +use rstest::rstest; +use std::collections::BTreeSet; +use std::fs; +use std::path::{Path, PathBuf}; +use std::str::FromStr; + +const RESOURCES_PATHS: &str = "src/derivation/tests/derivation_tests"; + +#[rstest] +fn check_serialization( + #[files("src/derivation/tests/derivation_tests/ok/*.drv")] + #[exclude("(cp1252)|(latin1)")] // skip JSON files known to fail parsing + path_to_drv_file: PathBuf, +) { + let json_bytes = + fs::read(path_to_drv_file.with_extension("drv.json")).expect("unable to read JSON"); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + let mut serialized_derivation = Vec::new(); + derivation.serialize(&mut serialized_derivation).unwrap(); + + let expected = fs::read(&path_to_drv_file).expect("unable to read .drv"); + + assert_eq!(expected, BStr::new(&serialized_derivation)); +} + +#[rstest] +fn validate( + #[files("src/derivation/tests/derivation_tests/ok/*.drv")] + #[exclude("(cp1252)|(latin1)")] // skip JSON files known to fail parsing + path_to_drv_file: PathBuf, +) { + let json_bytes = + fs::read(path_to_drv_file.with_extension("drv.json")).expect("unable to read JSON"); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + derivation + .validate(true) + .expect("derivation failed to validate") +} + +#[rstest] +fn check_to_aterm_bytes( + #[files("src/derivation/tests/derivation_tests/ok/*.drv")] + #[exclude("(cp1252)|(latin1)")] // skip JSON files known to fail parsing + path_to_drv_file: PathBuf, +) { + let json_bytes = + fs::read(path_to_drv_file.with_extension("drv.json")).expect("unable to read JSON"); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + let expected = fs::read(&path_to_drv_file).expect("unable to read .drv"); + + assert_eq!(expected, BStr::new(&derivation.to_aterm_bytes())); +} + +/// Reads in derivations in ATerm representation, parses with that parser, +/// then compares the structs with the ones obtained by parsing the JSON +/// representations. +#[rstest] +fn from_aterm_bytes( + #[files("src/derivation/tests/derivation_tests/ok/*.drv")] path_to_drv_file: PathBuf, +) { + // Read in ATerm representation. + let aterm_bytes = fs::read(&path_to_drv_file).expect("unable to read .drv"); + let parsed_drv = Derivation::from_aterm_bytes(&aterm_bytes).expect("must succeed"); + + // For where we're able to load JSON fixtures, parse them and compare the structs. + // For where we're not, compare the bytes manually. + if path_to_drv_file.file_name().is_some_and(|s| { + s.as_encoded_bytes().ends_with(b"cp1252.drv") + || s.as_encoded_bytes().ends_with(b"latin1.drv") + }) { + assert_eq!( + &[0xc5, 0xc4, 0xd6][..], + parsed_drv.environment.get("chars").unwrap(), + "expected bytes to match", + ); + } else { + let json_bytes = + fs::read(path_to_drv_file.with_extension("drv.json")).expect("unable to read JSON"); + let fixture_derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + assert_eq!(fixture_derivation, parsed_drv); + } + + // Finally, write the ATerm serialization to another buffer, ensuring it's + // stable (and we compare all fields we couldn't compare in the non-utf8 + // derivations) + + assert_eq!( + &aterm_bytes, + &BString::new(parsed_drv.to_aterm_bytes()), + "expected serialized ATerm to match initial input" + ); +} + +#[test] +fn from_aterm_bytes_duplicate_map_key() { + let buf: Vec<u8> = + fs::read(format!("{}/{}", RESOURCES_PATHS, "duplicate.drv")).expect("unable to read .drv"); + + let err = Derivation::from_aterm_bytes(&buf).expect_err("must fail"); + + match err { + Error::Parser(NomError { input: _, code }) => { + assert_eq!(code, ErrorKind::DuplicateMapKey("name".to_string())); + } + _ => { + panic!("unexpected error"); + } + } +} + +/// Read in a derivation in ATerm, but add some garbage at the end. +/// Ensure the parser detects and fails in this case. +#[test] +fn from_aterm_bytes_trailer() { + let mut buf: Vec<u8> = fs::read(format!( + "{}/ok/{}", + RESOURCES_PATHS, "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" + )) + .expect("unable to read .drv"); + + buf.push(0x00); + + Derivation::from_aterm_bytes(&buf).expect_err("must fail"); +} + +#[rstest] +#[case::fixed_sha256("bar", "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv")] +#[case::simple_sha256("foo", "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv")] +#[case::fixed_sha1("bar", "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv")] +#[case::simple_sha1("foo", "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv")] +#[case::multiple_outputs("has-multi-out", "h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv")] +#[case::structured_attrs( + "structured-attrs", + "9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv" +)] +#[case::unicode("unicode", "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv")] +fn derivation_path(#[case] name: &str, #[case] expected_path: &str) { + let json_bytes = fs::read(format!("{}/ok/{}.json", RESOURCES_PATHS, expected_path)) + .expect("unable to read JSON"); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + assert_eq!( + derivation.calculate_derivation_path(name).unwrap(), + StorePath::from_str(expected_path).unwrap() + ); +} + +/// This trims all output paths from a Derivation struct, +/// by setting outputs[$outputName].path and environment[$outputName] to the empty string. +fn derivation_without_output_paths(derivation: &Derivation) -> Derivation { + let mut trimmed_env = derivation.environment.clone(); + let mut trimmed_outputs = derivation.outputs.clone(); + + for (output_name, output) in &derivation.outputs { + trimmed_env.insert(output_name.clone(), "".into()); + assert!(trimmed_outputs.contains_key(output_name)); + trimmed_outputs.insert( + output_name.to_string(), + Output { + path: None, + ..output.clone() + }, + ); + } + + // replace environment and outputs with the trimmed variants + Derivation { + environment: trimmed_env, + outputs: trimmed_outputs, + ..derivation.clone() + } +} + +#[rstest] +#[case::fixed_sha256("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", hex!("724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"))] +#[case::fixed_sha1("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", hex!("c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df"))] +fn hash_derivation_modulo_fixed(#[case] drv_path: &str, #[case] expected_digest: [u8; 32]) { + // read in the fixture + let json_bytes = + fs::read(format!("{}/ok/{}.json", RESOURCES_PATHS, drv_path)).expect("unable to read JSON"); + let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize"); + + let actual = drv.hash_derivation_modulo(|_| panic!("must not be called")); + assert_eq!(expected_digest, actual); +} + +/// This reads a Derivation (in A-Term), trims out all fields containing +/// calculated output paths, then triggers the output path calculation and +/// compares the struct to match what was originally read in. +#[rstest] +#[case::fixed_sha256("bar", "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv")] +#[case::simple_sha256("foo", "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv")] +#[case::fixed_sha1("bar", "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv")] +#[case::simple_sha1("foo", "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv")] +#[case::multiple_outputs("has-multi-out", "h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv")] +#[case::structured_attrs( + "structured-attrs", + "9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv" +)] +#[case::unicode("unicode", "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv")] +#[case::cp1252("cp1252", "m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv")] +#[case::latin1("latin1", "x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv")] +fn output_paths(#[case] name: &str, #[case] drv_path_str: &str) { + // read in the derivation + let expected_derivation = Derivation::from_aterm_bytes( + &fs::read(format!("{}/ok/{}", RESOURCES_PATHS, drv_path_str)).expect("unable to read .drv"), + ) + .expect("must succeed"); + + // create a version without output paths, simulating we constructed the + // struct. + let mut derivation = derivation_without_output_paths(&expected_derivation); + + // calculate the hash_derivation_modulo of Derivation + // We don't expect the lookup function to be called for most derivations. + let actual_hash_derivation_modulo = derivation.hash_derivation_modulo(|parent_drv_path| { + // 4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv may lookup /nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv + // ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv may lookup /nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv + if name == "foo" + && ((drv_path_str == "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv" + && parent_drv_path.to_string() == "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv") + || (drv_path_str == "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv" + && parent_drv_path.to_string() == "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv")) + { + // do the lookup, by reading in the fixture of the requested + // drv_name, and calculating its drv replacement (on the non-stripped version) + // In a real-world scenario you would have already done this during construction. + + let json_bytes = fs::read(format!( + "{}/ok/{}.json", + RESOURCES_PATHS, + Path::new(&parent_drv_path.to_string()) + .file_name() + .unwrap() + .to_string_lossy() + )) + .expect("unable to read JSON"); + + let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize"); + + // calculate hash_derivation_modulo for each parent. + // This may not trigger subsequent requests, as both parents are FOD. + drv.hash_derivation_modulo(|_| panic!("must not lookup")) + } else { + // we only expect this to be called in the "foo" testcase, for the "bar derivations" + panic!("may only be called for foo testcase on bar derivations"); + } + }); + + derivation + .calculate_output_paths(name, &actual_hash_derivation_modulo) + .unwrap(); + + // The derivation should now look like it was before + assert_eq!(expected_derivation, derivation); +} + +/// Exercises the output path calculation functions like a constructing client +/// (an implementation of builtins.derivation) would do: +/// +/// ```nix +/// rec { +/// bar = builtins.derivation { +/// name = "bar"; +/// builder = ":"; +/// system = ":"; +/// outputHash = "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba"; +/// outputHashAlgo = "sha256"; +/// outputHashMode = "recursive"; +/// }; +/// +/// foo = builtins.derivation { +/// name = "foo"; +/// builder = ":"; +/// system = ":"; +/// inherit bar; +/// }; +/// } +/// ``` +/// It first assembles the bar derivation, does the output path calculation on +/// it, then continues with the foo derivation. +/// +/// The code ensures the resulting Derivations match our fixtures. +#[test] +fn output_path_construction() { + // create the bar derivation + let mut bar_drv = Derivation { + builder: ":".to_string(), + system: ":".to_string(), + ..Default::default() + }; + + // assemble bar env + let bar_env = &mut bar_drv.environment; + bar_env.insert("builder".to_string(), ":".into()); + bar_env.insert("name".to_string(), "bar".into()); + bar_env.insert("out".to_string(), "".into()); // will be calculated + bar_env.insert( + "outputHash".to_string(), + "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba".into(), + ); + bar_env.insert("outputHashAlgo".to_string(), "sha256".into()); + bar_env.insert("outputHashMode".to_string(), "recursive".into()); + bar_env.insert("system".to_string(), ":".into()); + + // assemble bar outputs + bar_drv.outputs.insert( + "out".to_string(), + Output { + path: None, // will be calculated + ca_hash: Some(crate::nixhash::CAHash::Nar( + crate::nixhash::from_algo_and_digest( + crate::nixhash::HashAlgo::Sha256, + &data_encoding::HEXLOWER + .decode( + "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba" + .as_bytes(), + ) + .unwrap(), + ) + .unwrap(), + )), + }, + ); + + // calculate bar output paths + let bar_calc_result = bar_drv.calculate_output_paths( + "bar", + &bar_drv.hash_derivation_modulo(|_| panic!("is FOD, should not lookup")), + ); + assert!(bar_calc_result.is_ok()); + + // ensure it matches our bar fixture + let bar_json_bytes = fs::read(format!( + "{}/ok/{}.json", + RESOURCES_PATHS, "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" + )) + .expect("unable to read JSON"); + let bar_drv_expected: Derivation = + serde_json::from_slice(&bar_json_bytes).expect("must deserialize"); + assert_eq!(bar_drv_expected, bar_drv); + + // now construct foo, which requires bar_drv + // Note how we refer to the output path, drv name and replacement_str (with calculated output paths) of bar. + let bar_output_path = &bar_drv.outputs.get("out").expect("must exist").path; + let bar_drv_hash_derivation_modulo = + bar_drv.hash_derivation_modulo(|_| panic!("is FOD, should not lookup")); + + let bar_drv_path = bar_drv + .calculate_derivation_path("bar") + .expect("must succeed"); + + // create foo derivation + let mut foo_drv = Derivation { + builder: ":".to_string(), + system: ":".to_string(), + ..Default::default() + }; + + // assemble foo env + let foo_env = &mut foo_drv.environment; + // foo_env.insert("bar".to_string(), StorePathRef:: bar_output_path.to_owned().try_into().unwrap()); + foo_env.insert( + "bar".to_string(), + bar_output_path + .as_ref() + .unwrap() + .to_absolute_path() + .as_bytes() + .into(), + ); + foo_env.insert("builder".to_string(), ":".into()); + foo_env.insert("name".to_string(), "foo".into()); + foo_env.insert("out".to_string(), "".into()); // will be calculated + foo_env.insert("system".to_string(), ":".into()); + + // asssemble foo outputs + foo_drv.outputs.insert( + "out".to_string(), + Output { + path: None, // will be calculated + ca_hash: None, + }, + ); + + // assemble foo input_derivations + foo_drv + .input_derivations + .insert(bar_drv_path, BTreeSet::from(["out".to_string()])); + + // calculate foo output paths + let foo_calc_result = foo_drv.calculate_output_paths( + "foo", + &foo_drv.hash_derivation_modulo(|drv_path| { + if drv_path.to_string() != "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" { + panic!("lookup called with unexpected drv_path: {}", drv_path); + } + bar_drv_hash_derivation_modulo + }), + ); + assert!(foo_calc_result.is_ok()); + + // ensure it matches our foo fixture + let foo_json_bytes = fs::read(format!( + "{}/ok/{}.json", + RESOURCES_PATHS, "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv", + )) + .expect("unable to read JSON"); + let foo_drv_expected: Derivation = + serde_json::from_slice(&foo_json_bytes).expect("must deserialize"); + assert_eq!(foo_drv_expected, foo_drv); + + assert_eq!( + StorePath::from_str("4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv").expect("must succeed"), + foo_drv + .calculate_derivation_path("foo") + .expect("must succeed") + ); +} diff --git a/tvix/nix-compat/src/derivation/validate.rs b/tvix/nix-compat/src/derivation/validate.rs new file mode 100644 index 0000000000..e7b24d84ee --- /dev/null +++ b/tvix/nix-compat/src/derivation/validate.rs @@ -0,0 +1,141 @@ +use crate::derivation::{Derivation, DerivationError}; +use crate::store_path; + +impl Derivation { + /// validate ensures a Derivation struct is properly populated, + /// and returns a [DerivationError] if not. + /// + /// if `validate_output_paths` is set to false, the output paths are + /// excluded from validation. + /// + /// This is helpful to validate struct population before invoking + /// [Derivation::calculate_output_paths]. + pub fn validate(&self, validate_output_paths: bool) -> Result<(), DerivationError> { + // Ensure the number of outputs is > 1 + if self.outputs.is_empty() { + return Err(DerivationError::NoOutputs()); + } + + // Validate all outputs + for (output_name, output) in &self.outputs { + // empty output names are invalid. + // + // `drv` is an invalid output name too, as this would cause + // a `builtins.derivation` call to return an attrset with a + // `drvPath` key (which already exists) and has a different + // meaning. + // + // Other output names that don't match the name restrictions from + // [StorePathRef] will fail the [StorePathRef::validate_name] check. + if output_name.is_empty() + || output_name == "drv" + || store_path::validate_name(output_name.as_bytes()).is_err() + { + return Err(DerivationError::InvalidOutputName(output_name.to_string())); + } + + if output.is_fixed() { + if self.outputs.len() != 1 { + return Err(DerivationError::MoreThanOneOutputButFixed()); + } + if output_name != "out" { + return Err(DerivationError::InvalidOutputNameForFixed( + output_name.to_string(), + )); + } + } + + if let Err(e) = output.validate(validate_output_paths) { + return Err(DerivationError::InvalidOutput(output_name.to_string(), e)); + } + } + + // Validate all input_derivations + for (input_derivation_path, output_names) in &self.input_derivations { + // Validate input_derivation_path + if !input_derivation_path.name().ends_with(".drv") { + return Err(DerivationError::InvalidInputDerivationPrefix( + input_derivation_path.to_string(), + )); + } + + if output_names.is_empty() { + return Err(DerivationError::EmptyInputDerivationOutputNames( + input_derivation_path.to_string(), + )); + } + + for output_name in output_names.iter() { + // empty output names are invalid. + // + // `drv` is an invalid output name too, as this would cause + // a `builtins.derivation` call to return an attrset with a + // `drvPath` key (which already exists) and has a different + // meaning. + // + // Other output names that don't match the name restrictions from + // [StorePath] will fail the [StorePathRef::validate_name] check. + if output_name.is_empty() + || output_name == "drv" + || store_path::validate_name(output_name.as_bytes()).is_err() + { + return Err(DerivationError::InvalidInputDerivationOutputName( + input_derivation_path.to_string(), + output_name.to_string(), + )); + } + } + } + + // validate platform + if self.system.is_empty() { + return Err(DerivationError::InvalidPlatform(self.system.to_string())); + } + + // validate builder + if self.builder.is_empty() { + return Err(DerivationError::InvalidBuilder(self.builder.to_string())); + } + + // validate env, none of the keys may be empty. + // We skip the `name` validation seen in go-nix. + for k in self.environment.keys() { + if k.is_empty() { + return Err(DerivationError::InvalidEnvironmentKey(k.to_string())); + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use std::collections::BTreeMap; + + use crate::derivation::{CAHash, Derivation, Output}; + + /// Regression test: produce a Derivation that's almost valid, except its + /// fixed-output output has the wrong hash specified. + #[test] + fn output_validate() { + let mut outputs = BTreeMap::new(); + outputs.insert( + "out".to_string(), + Output { + path: None, + ca_hash: Some(CAHash::Text([0; 32])), // This is disallowed + }, + ); + + let drv = Derivation { + arguments: vec![], + builder: "/bin/sh".to_string(), + outputs, + system: "x86_64-linux".to_string(), + ..Default::default() + }; + + drv.validate(false).expect_err("must fail"); + } +} diff --git a/tvix/nix-compat/src/derivation/write.rs b/tvix/nix-compat/src/derivation/write.rs new file mode 100644 index 0000000000..735b781574 --- /dev/null +++ b/tvix/nix-compat/src/derivation/write.rs @@ -0,0 +1,257 @@ +//! This module implements the serialisation of derivations into the +//! [ATerm][] format used by C++ Nix. +//! +//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html + +use crate::aterm::escape_bytes; +use crate::derivation::{ca_kind_prefix, output::Output}; +use crate::nixbase32; +use crate::store_path::{StorePath, StorePathRef, STORE_DIR_WITH_SLASH}; +use bstr::BString; +use data_encoding::HEXLOWER; + +use std::{ + collections::{BTreeMap, BTreeSet}, + io, + io::Error, + io::Write, +}; + +pub const DERIVATION_PREFIX: &str = "Derive"; +pub const PAREN_OPEN: char = '('; +pub const PAREN_CLOSE: char = ')'; +pub const BRACKET_OPEN: char = '['; +pub const BRACKET_CLOSE: char = ']'; +pub const COMMA: char = ','; +pub const QUOTE: char = '"'; + +/// Something that can be written as ATerm. +/// +/// Note that we mostly use explicit `write_*` calls +/// instead since the serialization of the items depends on +/// the context a lot. +pub(crate) trait AtermWriteable { + fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()>; + + fn aterm_bytes(&self) -> Vec<u8> { + let mut bytes = Vec::new(); + self.aterm_write(&mut bytes) + .expect("unexpected write errors to Vec"); + bytes + } +} + +impl AtermWriteable for StorePathRef<'_> { + fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()> { + write_char(writer, QUOTE)?; + writer.write_all(STORE_DIR_WITH_SLASH.as_bytes())?; + writer.write_all(nixbase32::encode(self.digest()).as_bytes())?; + write_char(writer, '-')?; + writer.write_all(self.name().as_bytes())?; + write_char(writer, QUOTE)?; + Ok(()) + } +} + +impl AtermWriteable for StorePath { + fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()> { + let r: StorePathRef = self.into(); + r.aterm_write(writer) + } +} + +impl AtermWriteable for String { + fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()> { + write_field(writer, self, true) + } +} + +impl AtermWriteable for [u8; 32] { + fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()> { + write_field(writer, HEXLOWER.encode(self), false) + } +} + +// Writes a character to the writer. +pub(crate) fn write_char(writer: &mut impl Write, c: char) -> io::Result<()> { + let mut buf = [0; 4]; + let b = c.encode_utf8(&mut buf).as_bytes(); + writer.write_all(b) +} + +// Write a string `s` as a quoted field to the writer. +// The `escape` argument controls whether escaping will be skipped. +// This is the case if `s` is known to only contain characters that need no +// escaping. +pub(crate) fn write_field<S: AsRef<[u8]>>( + writer: &mut impl Write, + s: S, + escape: bool, +) -> io::Result<()> { + write_char(writer, QUOTE)?; + + if !escape { + writer.write_all(s.as_ref())?; + } else { + writer.write_all(&escape_bytes(s.as_ref()))?; + } + + write_char(writer, QUOTE)?; + + Ok(()) +} + +fn write_array_elements<S: AsRef<[u8]>>( + writer: &mut impl Write, + elements: &[S], +) -> Result<(), io::Error> { + for (index, element) in elements.iter().enumerate() { + if index > 0 { + write_char(writer, COMMA)?; + } + + write_field(writer, element, true)?; + } + + Ok(()) +} + +pub(crate) fn write_outputs( + writer: &mut impl Write, + outputs: &BTreeMap<String, Output>, +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + for (ii, (output_name, output)) in outputs.iter().enumerate() { + if ii > 0 { + write_char(writer, COMMA)?; + } + + write_char(writer, PAREN_OPEN)?; + + let path_str = output.path_str(); + let mut elements: Vec<&str> = vec![output_name, &path_str]; + + let (mode_and_algo, digest) = match &output.ca_hash { + Some(ca_hash) => ( + format!("{}{}", ca_kind_prefix(ca_hash), ca_hash.hash().algo()), + data_encoding::HEXLOWER.encode(ca_hash.hash().digest_as_bytes()), + ), + None => ("".to_string(), "".to_string()), + }; + + elements.push(&mode_and_algo); + elements.push(&digest); + + write_array_elements(writer, &elements)?; + + write_char(writer, PAREN_CLOSE)?; + } + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub(crate) fn write_input_derivations( + writer: &mut impl Write, + input_derivations: &BTreeMap<impl AtermWriteable, BTreeSet<String>>, +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + + for (ii, (input_derivation_aterm, output_names)) in input_derivations.iter().enumerate() { + if ii > 0 { + write_char(writer, COMMA)?; + } + + write_char(writer, PAREN_OPEN)?; + input_derivation_aterm.aterm_write(writer)?; + write_char(writer, COMMA)?; + + write_char(writer, BRACKET_OPEN)?; + write_array_elements( + writer, + &output_names + .iter() + .map(String::as_bytes) + .collect::<Vec<_>>(), + )?; + write_char(writer, BRACKET_CLOSE)?; + + write_char(writer, PAREN_CLOSE)?; + } + + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub(crate) fn write_input_sources( + writer: &mut impl Write, + input_sources: &BTreeSet<StorePath>, +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + write_array_elements( + writer, + &input_sources + .iter() + .map(StorePath::to_absolute_path) + .collect::<Vec<_>>(), + )?; + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub(crate) fn write_system(writer: &mut impl Write, platform: &str) -> Result<(), Error> { + write_field(writer, platform, true)?; + Ok(()) +} + +pub(crate) fn write_builder(writer: &mut impl Write, builder: &str) -> Result<(), Error> { + write_field(writer, builder, true)?; + Ok(()) +} + +pub(crate) fn write_arguments( + writer: &mut impl Write, + arguments: &[String], +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + write_array_elements( + writer, + &arguments + .iter() + .map(|s| s.as_bytes().to_vec().into()) + .collect::<Vec<BString>>(), + )?; + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub(crate) fn write_environment<E, K, V>( + writer: &mut impl Write, + environment: E, +) -> Result<(), io::Error> +where + E: IntoIterator<Item = (K, V)>, + K: AsRef<[u8]>, + V: AsRef<[u8]>, +{ + write_char(writer, BRACKET_OPEN)?; + + for (i, (k, v)) in environment.into_iter().enumerate() { + if i > 0 { + write_char(writer, COMMA)?; + } + + write_char(writer, PAREN_OPEN)?; + write_field(writer, k, false)?; + write_char(writer, COMMA)?; + write_field(writer, v, true)?; + write_char(writer, PAREN_CLOSE)?; + } + + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} diff --git a/tvix/nix-compat/src/lib.rs b/tvix/nix-compat/src/lib.rs new file mode 100644 index 0000000000..a71ede3eec --- /dev/null +++ b/tvix/nix-compat/src/lib.rs @@ -0,0 +1,18 @@ +pub(crate) mod aterm; +pub mod derivation; +pub mod nar; +pub mod narinfo; +pub mod nixbase32; +pub mod nixhash; +pub mod path_info; +pub mod store_path; + +#[cfg(feature = "wire")] +pub mod wire; + +#[cfg(feature = "wire")] +mod nix_daemon; +#[cfg(feature = "wire")] +pub use nix_daemon::worker_protocol; +#[cfg(feature = "wire")] +pub use nix_daemon::ProtocolVersion; diff --git a/tvix/nix-compat/src/nar/mod.rs b/tvix/nix-compat/src/nar/mod.rs new file mode 100644 index 0000000000..c678d26ffb --- /dev/null +++ b/tvix/nix-compat/src/nar/mod.rs @@ -0,0 +1,4 @@ +pub(crate) mod wire; + +pub mod reader; +pub mod writer; diff --git a/tvix/nix-compat/src/nar/reader/async/mod.rs b/tvix/nix-compat/src/nar/reader/async/mod.rs new file mode 100644 index 0000000000..0808fba38c --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/async/mod.rs @@ -0,0 +1,173 @@ +use std::{ + mem::MaybeUninit, + pin::Pin, + task::{self, Poll}, +}; + +use tokio::io::{self, AsyncBufRead, AsyncRead, ErrorKind::InvalidData}; + +// Required reading for understanding this module. +use crate::{ + nar::{self, wire::PadPar}, + wire::{self, BytesReader}, +}; + +mod read; +#[cfg(test)] +mod test; + +pub type Reader<'a> = dyn AsyncBufRead + Unpin + Send + 'a; + +/// Start reading a NAR file from `reader`. +pub async fn open<'a, 'r>(reader: &'a mut Reader<'r>) -> io::Result<Node<'a, 'r>> { + read::token(reader, &nar::wire::TOK_NAR).await?; + Node::new(reader).await +} + +pub enum Node<'a, 'r: 'a> { + Symlink { + target: Vec<u8>, + }, + File { + executable: bool, + reader: FileReader<'a, 'r>, + }, + Directory(DirReader<'a, 'r>), +} + +impl<'a, 'r: 'a> Node<'a, 'r> { + /// Start reading a [Node], matching the next [wire::Node]. + /// + /// Reading the terminating [wire::TOK_PAR] is done immediately for [Node::Symlink], + /// but is otherwise left to [DirReader] or [BytesReader]. + async fn new(reader: &'a mut Reader<'r>) -> io::Result<Self> { + Ok(match read::tag(reader).await? { + nar::wire::Node::Sym => { + let target = wire::read_bytes(reader, 1..=nar::wire::MAX_TARGET_LEN).await?; + + if target.contains(&0) { + return Err(InvalidData.into()); + } + + read::token(reader, &nar::wire::TOK_PAR).await?; + + Node::Symlink { target } + } + tag @ (nar::wire::Node::Reg | nar::wire::Node::Exe) => Node::File { + executable: tag == nar::wire::Node::Exe, + reader: FileReader { + inner: BytesReader::new_internal(reader, ..).await?, + }, + }, + nar::wire::Node::Dir => Node::Directory(DirReader::new(reader)), + }) + } +} + +/// File contents, readable through the [AsyncRead] trait. +/// +/// It comes with some caveats: +/// * You must always read the entire file, unless you intend to abandon the entire archive reader. +/// * You must abandon the entire archive reader upon the first error. +/// +/// It's fine to read exactly `reader.len()` bytes without ever seeing an explicit EOF. +pub struct FileReader<'a, 'r> { + inner: BytesReader<&'a mut Reader<'r>, PadPar>, +} + +impl<'a, 'r> FileReader<'a, 'r> { + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn len(&self) -> u64 { + self.inner.len() + } +} + +impl<'a, 'r> AsyncRead for FileReader<'a, 'r> { + fn poll_read( + self: Pin<&mut Self>, + cx: &mut task::Context, + buf: &mut io::ReadBuf, + ) -> Poll<io::Result<()>> { + Pin::new(&mut self.get_mut().inner).poll_read(cx, buf) + } +} + +impl<'a, 'r> AsyncBufRead for FileReader<'a, 'r> { + fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut task::Context) -> Poll<io::Result<&[u8]>> { + Pin::new(&mut self.get_mut().inner).poll_fill_buf(cx) + } + + fn consume(self: Pin<&mut Self>, amt: usize) { + Pin::new(&mut self.get_mut().inner).consume(amt) + } +} + +/// A directory iterator, yielding a sequence of [Node]s. +/// It must be fully consumed before reading further from the [DirReader] that produced it, if any. +pub struct DirReader<'a, 'r> { + reader: &'a mut Reader<'r>, + /// Previous directory entry name. + /// We have to hang onto this to enforce name monotonicity. + prev_name: Vec<u8>, +} + +pub struct Entry<'a, 'r> { + pub name: &'a [u8], + pub node: Node<'a, 'r>, +} + +impl<'a, 'r> DirReader<'a, 'r> { + fn new(reader: &'a mut Reader<'r>) -> Self { + Self { + reader, + prev_name: vec![], + } + } + + /// Read the next [Entry] from the directory. + /// + /// We explicitly don't implement [Iterator], since treating this as + /// a regular Rust iterator will surely lead you astray. + /// + /// * You must always consume the entire iterator, unless you abandon the entire archive reader. + /// * You must abandon the entire archive reader on the first error. + /// * You must abandon the directory reader upon the first [None]. + /// * Even if you know the amount of elements up front, you must keep reading until you encounter [None]. + pub async fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> { + // COME FROM the previous iteration: if we've already read an entry, + // read its terminating TOK_PAR here. + if !self.prev_name.is_empty() { + read::token(self.reader, &nar::wire::TOK_PAR).await?; + } + + if let nar::wire::Entry::None = read::tag(self.reader).await? { + return Ok(None); + } + + let mut name = [MaybeUninit::uninit(); nar::wire::MAX_NAME_LEN + 1]; + let name = + wire::read_bytes_buf(self.reader, &mut name, 1..=nar::wire::MAX_NAME_LEN).await?; + + if name.contains(&0) || name.contains(&b'/') || name == b"." || name == b".." { + return Err(InvalidData.into()); + } + + // Enforce strict monotonicity of directory entry names. + if &self.prev_name[..] >= name { + return Err(InvalidData.into()); + } + + self.prev_name.clear(); + self.prev_name.extend_from_slice(name); + + read::token(self.reader, &nar::wire::TOK_NOD).await?; + + Ok(Some(Entry { + name: &self.prev_name, + node: Node::new(self.reader).await?, + })) + } +} diff --git a/tvix/nix-compat/src/nar/reader/async/read.rs b/tvix/nix-compat/src/nar/reader/async/read.rs new file mode 100644 index 0000000000..2adf894922 --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/async/read.rs @@ -0,0 +1,69 @@ +use tokio::io::{ + self, AsyncReadExt, + ErrorKind::{InvalidData, UnexpectedEof}, +}; + +use crate::nar::wire::Tag; + +use super::Reader; + +/// Consume a known token from the reader. +pub async fn token<const N: usize>(reader: &mut Reader<'_>, token: &[u8; N]) -> io::Result<()> { + let mut buf = [0u8; N]; + + // This implements something similar to [AsyncReadExt::read_exact], but verifies that + // the input data matches the token while we read it. These two slices respectively + // represent the remaining token to be verified, and the remaining input buffer. + let mut token = &token[..]; + let mut buf = &mut buf[..]; + + while !token.is_empty() { + match reader.read(buf).await? { + 0 => { + return Err(UnexpectedEof.into()); + } + n => { + let (t, b); + (t, token) = token.split_at(n); + (b, buf) = buf.split_at_mut(n); + + if t != b { + return Err(InvalidData.into()); + } + } + } + } + + Ok(()) +} + +/// Consume a [Tag] from the reader. +pub async fn tag<T: Tag>(reader: &mut Reader<'_>) -> io::Result<T> { + let mut buf = T::make_buf(); + let buf = buf.as_mut(); + + // first read the known minimum length… + reader.read_exact(&mut buf[..T::MIN]).await?; + + // then decide which tag we're expecting + let tag = T::from_u8(buf[T::OFF]).ok_or(InvalidData)?; + let (head, tail) = tag.as_bytes().split_at(T::MIN); + + // make sure what we've read so far is valid + if buf[..T::MIN] != *head { + return Err(InvalidData.into()); + } + + // …then read the rest, if any + if !tail.is_empty() { + let rest = tail.len(); + reader.read_exact(&mut buf[..rest]).await?; + + // and make sure it's what we expect + if buf[..rest] != *tail { + return Err(InvalidData.into()); + } + } + + Ok(tag) +} diff --git a/tvix/nix-compat/src/nar/reader/async/test.rs b/tvix/nix-compat/src/nar/reader/async/test.rs new file mode 100644 index 0000000000..7bc1f8942f --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/async/test.rs @@ -0,0 +1,310 @@ +use tokio::io::AsyncReadExt; + +mod nar { + pub use crate::nar::reader::r#async as reader; +} + +#[tokio::test] +async fn symlink() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/symlink.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Symlink { target } => { + assert_eq!( + &b"/nix/store/somewhereelse"[..], + &target, + "target must match" + ); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +async fn file() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/helloworld.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + let mut buf = vec![]; + reader + .read_to_end(&mut buf) + .await + .expect("read must succeed"); + assert_eq!(&b"Hello World!"[..], &buf); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +async fn complicated() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + { + // first entry is .keep, an empty regular file. + let entry = subdir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"); + + must_read_file(".keep", entry).await; + } + + // we must read the None + assert!( + subdir_reader + .next() + .await + .expect("next must succeed") + .is_none(), + "keep directory contains only .keep" + ); + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // reading more entries yields None (and we actually must read until this) + assert!(dir_reader.next().await.expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +#[should_panic] +#[ignore = "TODO: async poisoning"] +async fn file_read_abandoned() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + { + let entry = dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"); + + assert_eq!(b".keep", entry.name); + // don't bother to finish reading it. + }; + + // this should panic (not return an error), because we are meant to abandon the archive reader now. + assert!(dir_reader.next().await.expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +#[should_panic] +#[ignore = "TODO: async poisoning"] +async fn dir_read_abandoned() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(_) => { + // don't finish using it, which poisons the archive reader + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // this should panic, because we didn't finish reading the child subdirectory + assert!(dir_reader.next().await.expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[tokio::test] +#[should_panic] +#[ignore = "TODO: async poisoning"] +async fn dir_read_after_none() { + let mut f = std::io::Cursor::new(include_bytes!("../../tests/complicated.nar")); + let node = nar::reader::open(&mut f).await.unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .await + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + subdir_reader + .next() + .await + .expect("next must succeed") + .expect("must be some"), + ) + .await; + + // we must read the None + assert!( + subdir_reader + .next() + .await + .expect("next must succeed") + .is_none(), + "keep directory contains only .keep" + ); + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // reading more entries yields None (and we actually must read until this) + assert!(dir_reader.next().await.expect("must succeed").is_none()); + + // this should panic, because we already got a none so we're meant to stop. + dir_reader.next().await.unwrap(); + unreachable!() + } + _ => panic!("unexpected type"), + } +} + +async fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { + assert_eq!(name.as_bytes(), entry.name); + + match entry.node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + assert_eq!(reader.read(&mut [0]).await.unwrap(), 0); + } + _ => panic!("unexpected type for {}", name), + } +} + +fn must_be_symlink( + name: &'static str, + exp_target: &'static str, + entry: nar::reader::Entry<'_, '_>, +) { + assert_eq!(name.as_bytes(), entry.name); + + match entry.node { + nar::reader::Node::Symlink { target } => { + assert_eq!(exp_target.as_bytes(), &target); + } + _ => panic!("unexpected type for {}", name), + } +} diff --git a/tvix/nix-compat/src/nar/reader/mod.rs b/tvix/nix-compat/src/nar/reader/mod.rs new file mode 100644 index 0000000000..7e9143c8f3 --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/mod.rs @@ -0,0 +1,477 @@ +//! Parser for the Nix archive format, aka NAR. +//! +//! NAR files (and their hashed representations) are used in C++ Nix for +//! a variety of things, including addressing fixed-output derivations +//! and transferring store paths between Nix stores. + +use std::io::{ + self, BufRead, + ErrorKind::{InvalidData, UnexpectedEof}, + Read, Write, +}; + +#[cfg(not(debug_assertions))] +use std::marker::PhantomData; + +// Required reading for understanding this module. +use crate::nar::wire; + +#[cfg(all(feature = "async", feature = "wire"))] +pub mod r#async; + +mod read; +#[cfg(test)] +mod test; + +pub type Reader<'a> = dyn BufRead + Send + 'a; + +struct ArchiveReader<'a, 'r> { + inner: &'a mut Reader<'r>, + + /// In debug mode, also track when we need to abandon this archive reader. + /// The archive reader must be abandoned when: + /// * An error is encountered at any point + /// * A file or directory reader is dropped before being read entirely. + /// All of these checks vanish in release mode. + status: ArchiveReaderStatus<'a>, +} + +macro_rules! try_or_poison { + ($it:expr, $ex:expr) => { + match $ex { + Ok(x) => x, + Err(e) => { + $it.status.poison(); + return Err(e.into()); + } + } + }; +} +/// Start reading a NAR file from `reader`. +pub fn open<'a, 'r>(reader: &'a mut Reader<'r>) -> io::Result<Node<'a, 'r>> { + read::token(reader, &wire::TOK_NAR)?; + Node::new(ArchiveReader { + inner: reader, + status: ArchiveReaderStatus::top(), + }) +} + +pub enum Node<'a, 'r> { + Symlink { + target: Vec<u8>, + }, + File { + executable: bool, + reader: FileReader<'a, 'r>, + }, + Directory(DirReader<'a, 'r>), +} + +impl<'a, 'r> Node<'a, 'r> { + /// Start reading a [Node], matching the next [wire::Node]. + /// + /// Reading the terminating [wire::TOK_PAR] is done immediately for [Node::Symlink], + /// but is otherwise left to [DirReader] or [FileReader]. + fn new(mut reader: ArchiveReader<'a, 'r>) -> io::Result<Self> { + Ok(match read::tag(reader.inner)? { + wire::Node::Sym => { + let target = + try_or_poison!(reader, read::bytes(reader.inner, wire::MAX_TARGET_LEN)); + + if target.is_empty() || target.contains(&0) { + reader.status.poison(); + return Err(InvalidData.into()); + } + + try_or_poison!(reader, read::token(reader.inner, &wire::TOK_PAR)); + reader.status.ready_parent(); // Immediately allow reading from parent again + + Node::Symlink { target } + } + tag @ (wire::Node::Reg | wire::Node::Exe) => { + let len = try_or_poison!(&mut reader, read::u64(reader.inner)); + + Node::File { + executable: tag == wire::Node::Exe, + reader: FileReader::new(reader, len)?, + } + } + wire::Node::Dir => Node::Directory(DirReader::new(reader)), + }) + } +} + +/// File contents, readable through the [Read] trait. +/// +/// It comes with some caveats: +/// * You must always read the entire file, unless you intend to abandon the entire archive reader. +/// * You must abandon the entire archive reader upon the first error. +/// +/// It's fine to read exactly `reader.len()` bytes without ever seeing an explicit EOF. +pub struct FileReader<'a, 'r> { + reader: ArchiveReader<'a, 'r>, + len: u64, + /// Truncated original file length for padding computation. + /// We only care about the 3 least significant bits; semantically, this is a u3. + pad: u8, +} + +impl<'a, 'r> FileReader<'a, 'r> { + /// Instantiate a new reader, starting after [wire::TOK_REG] or [wire::TOK_EXE]. + /// We handle the terminating [wire::TOK_PAR] on semantic EOF. + fn new(mut reader: ArchiveReader<'a, 'r>, len: u64) -> io::Result<Self> { + // For zero-length files, we have to read the terminating TOK_PAR + // immediately, since FileReader::read may never be called; we've + // already reached semantic EOF by definition. + if len == 0 { + read::token(reader.inner, &wire::TOK_PAR)?; + reader.status.ready_parent(); + } + + Ok(Self { + reader, + len, + pad: len as u8, + }) + } + + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + pub fn len(&self) -> u64 { + self.len + } +} + +impl FileReader<'_, '_> { + /// Equivalent to [BufRead::fill_buf] + /// + /// We can't directly implement [BufRead], because [FileReader::consume] needs + /// to perform fallible I/O. + pub fn fill_buf(&mut self) -> io::Result<&[u8]> { + if self.is_empty() { + return Ok(&[]); + } + + self.reader.check_correct(); + + let mut buf = try_or_poison!(self.reader, self.reader.inner.fill_buf()); + + if buf.is_empty() { + self.reader.status.poison(); + return Err(UnexpectedEof.into()); + } + + if buf.len() as u64 > self.len { + buf = &buf[..self.len as usize]; + } + + Ok(buf) + } + + /// Analogous to [BufRead::consume], differing only in that it needs + /// to perform I/O in order to read padding and terminators. + pub fn consume(&mut self, n: usize) -> io::Result<()> { + if n == 0 { + return Ok(()); + } + + self.reader.check_correct(); + + self.len = self + .len + .checked_sub(n as u64) + .expect("consumed bytes past EOF"); + + self.reader.inner.consume(n); + + if self.is_empty() { + self.finish()?; + } + + Ok(()) + } + + /// Copy the (remaining) contents of the file into `dst`. + pub fn copy(&mut self, mut dst: impl Write) -> io::Result<()> { + while !self.is_empty() { + let buf = self.fill_buf()?; + let n = try_or_poison!(self.reader, dst.write(buf)); + self.consume(n)?; + } + + Ok(()) + } +} + +impl Read for FileReader<'_, '_> { + fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> { + if buf.is_empty() || self.is_empty() { + return Ok(0); + } + + self.reader.check_correct(); + + if buf.len() as u64 > self.len { + buf = &mut buf[..self.len as usize]; + } + + let n = try_or_poison!(self.reader, self.reader.inner.read(buf)); + self.len -= n as u64; + + if n == 0 { + self.reader.status.poison(); + return Err(UnexpectedEof.into()); + } + + if self.is_empty() { + self.finish()?; + } + + Ok(n) + } +} + +impl FileReader<'_, '_> { + /// We've reached semantic EOF, consume and verify the padding and terminating TOK_PAR. + /// Files are padded to 64 bits (8 bytes), just like any other byte string in the wire format. + fn finish(&mut self) -> io::Result<()> { + let pad = (self.pad & 7) as usize; + + if pad != 0 { + let mut buf = [0; 8]; + try_or_poison!(self.reader, self.reader.inner.read_exact(&mut buf[pad..])); + + if buf != [0; 8] { + self.reader.status.poison(); + return Err(InvalidData.into()); + } + } + + try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR)); + + // Done with reading this file, allow going back up the chain of readers + self.reader.status.ready_parent(); + + Ok(()) + } +} + +/// A directory iterator, yielding a sequence of [Node]s. +/// It must be fully consumed before reading further from the [DirReader] that produced it, if any. +pub struct DirReader<'a, 'r> { + reader: ArchiveReader<'a, 'r>, + /// Previous directory entry name. + /// We have to hang onto this to enforce name monotonicity. + prev_name: Vec<u8>, +} + +pub struct Entry<'a, 'r> { + pub name: &'a [u8], + pub node: Node<'a, 'r>, +} + +impl<'a, 'r> DirReader<'a, 'r> { + fn new(reader: ArchiveReader<'a, 'r>) -> Self { + Self { + reader, + prev_name: vec![], + } + } + + /// Read the next [Entry] from the directory. + /// + /// We explicitly don't implement [Iterator], since treating this as + /// a regular Rust iterator will surely lead you astray. + /// + /// * You must always consume the entire iterator, unless you abandon the entire archive reader. + /// * You must abandon the entire archive reader on the first error. + /// * You must abandon the directory reader upon the first [None]. + /// * Even if you know the amount of elements up front, you must keep reading until you encounter [None]. + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> io::Result<Option<Entry<'_, 'r>>> { + self.reader.check_correct(); + + // COME FROM the previous iteration: if we've already read an entry, + // read its terminating TOK_PAR here. + if !self.prev_name.is_empty() { + try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_PAR)); + } + + // Determine if there are more entries to follow + if let wire::Entry::None = try_or_poison!(self.reader, read::tag(self.reader.inner)) { + // We've reached the end of this directory. + self.reader.status.ready_parent(); + return Ok(None); + } + + let mut name = [0; wire::MAX_NAME_LEN + 1]; + let name = try_or_poison!( + self.reader, + read::bytes_buf(self.reader.inner, &mut name, wire::MAX_NAME_LEN) + ); + + if name.is_empty() + || name.contains(&0) + || name.contains(&b'/') + || name == b"." + || name == b".." + { + self.reader.status.poison(); + return Err(InvalidData.into()); + } + + // Enforce strict monotonicity of directory entry names. + if &self.prev_name[..] >= name { + self.reader.status.poison(); + return Err(InvalidData.into()); + } + + self.prev_name.clear(); + self.prev_name.extend_from_slice(name); + + try_or_poison!(self.reader, read::token(self.reader.inner, &wire::TOK_NOD)); + + Ok(Some(Entry { + name: &self.prev_name, + // Don't need to worry about poisoning here: Node::new will do it for us if needed + node: Node::new(self.reader.child())?, + })) + } +} + +/// We use a stack of statuses to: +/// * Share poisoned state across all objects from the same underlying reader, +/// so we can check they are abandoned when an error occurs +/// * Make sure only the most recently created object is read from, and is fully exhausted +/// before anything it was created from is used again. +enum ArchiveReaderStatus<'a> { + #[cfg(not(debug_assertions))] + None(PhantomData<&'a ()>), + #[cfg(debug_assertions)] + StackTop { poisoned: bool, ready: bool }, + #[cfg(debug_assertions)] + StackChild { + poisoned: &'a mut bool, + parent_ready: &'a mut bool, + ready: bool, + }, +} + +impl ArchiveReaderStatus<'_> { + fn top() -> Self { + #[cfg(debug_assertions)] + { + ArchiveReaderStatus::StackTop { + poisoned: false, + ready: true, + } + } + + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(PhantomData) + } + + /// Poison all the objects sharing the same reader, to be used when an error occurs + fn poison(&mut self) { + match self { + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => {} + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackTop { poisoned: x, .. } => *x = true, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackChild { poisoned: x, .. } => **x = true, + } + } + + /// Mark the parent as ready, allowing it to be used again and preventing this reference to the reader being used again. + fn ready_parent(&mut self) { + match self { + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => {} + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackTop { ready, .. } => { + *ready = false; + } + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackChild { + ready, + parent_ready, + .. + } => { + *ready = false; + **parent_ready = true; + } + }; + } + + fn poisoned(&self) -> bool { + match self { + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => false, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackTop { poisoned, .. } => *poisoned, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackChild { poisoned, .. } => **poisoned, + } + } + + fn ready(&self) -> bool { + match self { + #[cfg(not(debug_assertions))] + ArchiveReaderStatus::None(_) => true, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackTop { ready, .. } => *ready, + #[cfg(debug_assertions)] + ArchiveReaderStatus::StackChild { ready, .. } => *ready, + } + } +} + +impl<'a, 'r> ArchiveReader<'a, 'r> { + /// Create a new child reader from this one. + /// In debug mode, this reader will panic if called before the new child is exhausted / calls `ready_parent` + fn child(&mut self) -> ArchiveReader<'_, 'r> { + ArchiveReader { + inner: self.inner, + #[cfg(not(debug_assertions))] + status: ArchiveReaderStatus::None(PhantomData), + #[cfg(debug_assertions)] + status: match &mut self.status { + ArchiveReaderStatus::StackTop { poisoned, ready } => { + *ready = false; + ArchiveReaderStatus::StackChild { + poisoned, + parent_ready: ready, + ready: true, + } + } + ArchiveReaderStatus::StackChild { + poisoned, ready, .. + } => { + *ready = false; + ArchiveReaderStatus::StackChild { + poisoned, + parent_ready: ready, + ready: true, + } + } + }, + } + } + + /// Check the reader is in the correct status. + /// Only does anything when debug assertions are on. + #[inline(always)] + fn check_correct(&self) { + assert!( + !self.status.poisoned(), + "Archive reader used after it was meant to be abandoned!" + ); + assert!( + self.status.ready(), + "Non-ready archive reader used! (Should've been reading from something else)" + ); + } +} diff --git a/tvix/nix-compat/src/nar/reader/read.rs b/tvix/nix-compat/src/nar/reader/read.rs new file mode 100644 index 0000000000..9938581f2a --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/read.rs @@ -0,0 +1,141 @@ +//! Helpers for reading [crate::nar::wire] format. + +use std::io::{ + self, + ErrorKind::{Interrupted, InvalidData, UnexpectedEof}, +}; + +use super::Reader; +use crate::nar::wire::Tag; + +/// Consume a little-endian [prim@u64] from the reader. +pub fn u64(reader: &mut Reader) -> io::Result<u64> { + let mut buf = [0; 8]; + reader.read_exact(&mut buf)?; + Ok(u64::from_le_bytes(buf)) +} + +/// Consume a byte string from the reader into a provided buffer, +/// returning the data bytes. +pub fn bytes_buf<'a, const N: usize>( + reader: &mut Reader, + buf: &'a mut [u8; N], + max_len: usize, +) -> io::Result<&'a [u8]> { + assert_eq!(N % 8, 0); + assert!(max_len <= N); + + // read the length, and reject excessively large values + let len = self::u64(reader)?; + if len > max_len as u64 { + return Err(InvalidData.into()); + } + // we know the length fits in a usize now + let len = len as usize; + + // read the data and padding into a buffer + let buf_len = (len + 7) & !7; + reader.read_exact(&mut buf[..buf_len])?; + + // verify that the padding is all zeroes + for &b in &buf[len..buf_len] { + if b != 0 { + return Err(InvalidData.into()); + } + } + + Ok(&buf[..len]) +} + +/// Consume a byte string of up to `max_len` bytes from the reader. +pub fn bytes(reader: &mut Reader, max_len: usize) -> io::Result<Vec<u8>> { + assert!(max_len <= isize::MAX as usize); + + // read the length, and reject excessively large values + let len = self::u64(reader)?; + if len > max_len as u64 { + return Err(InvalidData.into()); + } + // we know the length fits in a usize now + let len = len as usize; + + // read the data and padding into a buffer + let buf_len = (len + 7) & !7; + let mut buf = vec![0; buf_len]; + reader.read_exact(&mut buf)?; + + // verify that the padding is all zeroes + for b in buf.drain(len..) { + if b != 0 { + return Err(InvalidData.into()); + } + } + + Ok(buf) +} + +/// Consume a known token from the reader. +pub fn token<const N: usize>(reader: &mut Reader, token: &[u8; N]) -> io::Result<()> { + let mut buf = [0u8; N]; + + // This implements something similar to [Read::read_exact], but verifies that + // the input data matches the token while we read it. These two slices respectively + // represent the remaining token to be verified, and the remaining input buffer. + let mut token = &token[..]; + let mut buf = &mut buf[..]; + + while !token.is_empty() { + match reader.read(buf) { + Ok(0) => { + return Err(UnexpectedEof.into()); + } + Ok(n) => { + let (t, b); + (t, token) = token.split_at(n); + (b, buf) = buf.split_at_mut(n); + + if t != b { + return Err(InvalidData.into()); + } + } + Err(e) => { + if e.kind() != Interrupted { + return Err(e); + } + } + } + } + + Ok(()) +} + +/// Consume a [Tag] from the reader. +pub fn tag<T: Tag>(reader: &mut Reader) -> io::Result<T> { + let mut buf = T::make_buf(); + let buf = buf.as_mut(); + + // first read the known minimum length… + reader.read_exact(&mut buf[..T::MIN])?; + + // then decide which tag we're expecting + let tag = T::from_u8(buf[T::OFF]).ok_or(InvalidData)?; + let (head, tail) = tag.as_bytes().split_at(T::MIN); + + // make sure what we've read so far is valid + if buf[..T::MIN] != *head { + return Err(InvalidData.into()); + } + + // …then read the rest, if any + if !tail.is_empty() { + let rest = tail.len(); + reader.read_exact(&mut buf[..rest])?; + + // and make sure it's what we expect + if buf[..rest] != *tail { + return Err(InvalidData.into()); + } + } + + Ok(tag) +} diff --git a/tvix/nix-compat/src/nar/reader/test.rs b/tvix/nix-compat/src/nar/reader/test.rs new file mode 100644 index 0000000000..63e4fb289f --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/test.rs @@ -0,0 +1,278 @@ +use std::io::Read; + +use crate::nar; + +#[test] +fn symlink() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/symlink.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Symlink { target } => { + assert_eq!( + &b"/nix/store/somewhereelse"[..], + &target, + "target must match" + ); + } + _ => panic!("unexpected type"), + } +} + +#[test] +fn file() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/helloworld.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + let mut buf = vec![]; + reader.read_to_end(&mut buf).expect("read must succeed"); + assert_eq!(&b"Hello World!"[..], &buf); + } + _ => panic!("unexpected type"), + } +} + +#[test] +fn complicated() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .expect("next must succeed") + .expect("must be some"), + ); + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + { + // first entry is .keep, an empty regular file. + let entry = subdir_reader + .next() + .expect("next must succeed") + .expect("must be some"); + + must_read_file(".keep", entry); + } + + // we must read the None + assert!( + subdir_reader.next().expect("next must succeed").is_none(), + "keep directory contains only .keep" + ); + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // reading more entries yields None (and we actually must read until this) + assert!(dir_reader.next().expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[test] +#[should_panic] +fn file_read_abandoned() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + { + let entry = dir_reader + .next() + .expect("next must succeed") + .expect("must be some"); + + assert_eq!(b".keep", entry.name); + // don't bother to finish reading it. + }; + + // this should panic (not return an error), because we are meant to abandon the archive reader now. + assert!(dir_reader.next().expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[test] +#[should_panic] +fn dir_read_abandoned() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .expect("next must succeed") + .expect("must be some"), + ); + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(_) => { + // don't finish using it, which poisons the archive reader + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // this should panic, because we didn't finish reading the child subdirectory + assert!(dir_reader.next().expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} + +#[test] +#[should_panic] +fn dir_read_after_none() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + dir_reader + .next() + .expect("next must succeed") + .expect("must be some"), + ); + + // second entry is aa, a symlink to /nix/store/somewhereelse + must_be_symlink( + "aa", + "/nix/store/somewhereelse", + dir_reader + .next() + .expect("next must be some") + .expect("must be some"), + ); + + { + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .expect("next must be some") + .expect("must be some"); + + assert_eq!(b"keep", entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + // first entry is .keep, an empty regular file. + must_read_file( + ".keep", + subdir_reader + .next() + .expect("next must succeed") + .expect("must be some"), + ); + + // we must read the None + assert!( + subdir_reader.next().expect("next must succeed").is_none(), + "keep directory contains only .keep" + ); + } + _ => panic!("unexpected type for keep/.keep"), + } + }; + + // reading more entries yields None (and we actually must read until this) + assert!(dir_reader.next().expect("must succeed").is_none()); + + // this should panic, because we already got a none so we're meant to stop. + dir_reader.next().unwrap(); + unreachable!() + } + _ => panic!("unexpected type"), + } +} + +fn must_read_file(name: &'static str, entry: nar::reader::Entry<'_, '_>) { + assert_eq!(name.as_bytes(), entry.name); + + match entry.node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + assert_eq!(reader.read(&mut [0]).unwrap(), 0); + } + _ => panic!("unexpected type for {}", name), + } +} + +fn must_be_symlink( + name: &'static str, + exp_target: &'static str, + entry: nar::reader::Entry<'_, '_>, +) { + assert_eq!(name.as_bytes(), entry.name); + + match entry.node { + nar::reader::Node::Symlink { target } => { + assert_eq!(exp_target.as_bytes(), &target); + } + _ => panic!("unexpected type for {}", name), + } +} diff --git a/tvix/nix-compat/src/nar/tests/complicated.nar b/tvix/nix-compat/src/nar/tests/complicated.nar new file mode 100644 index 0000000000..6a137f5fbb --- /dev/null +++ b/tvix/nix-compat/src/nar/tests/complicated.nar Binary files differdiff --git a/tvix/nix-compat/src/nar/tests/helloworld.nar b/tvix/nix-compat/src/nar/tests/helloworld.nar new file mode 100644 index 0000000000..2e12681152 --- /dev/null +++ b/tvix/nix-compat/src/nar/tests/helloworld.nar Binary files differdiff --git a/tvix/nix-compat/src/nar/tests/symlink.nar b/tvix/nix-compat/src/nar/tests/symlink.nar new file mode 100644 index 0000000000..7990e4ad5b --- /dev/null +++ b/tvix/nix-compat/src/nar/tests/symlink.nar Binary files differdiff --git a/tvix/nix-compat/src/nar/wire/mod.rs b/tvix/nix-compat/src/nar/wire/mod.rs new file mode 100644 index 0000000000..9e99b530ce --- /dev/null +++ b/tvix/nix-compat/src/nar/wire/mod.rs @@ -0,0 +1,150 @@ +//! NAR wire format, without I/O details, since those differ between +//! the synchronous and asynchronous implementations. +//! +//! The wire format is an S-expression format, encoded onto the wire +//! using simple encoding rules. +//! +//! # Encoding +//! +//! Lengths are represented as 64-bit unsigned integers in little-endian +//! format. Byte strings, including file contents and syntactic strings +//! part of the grammar, are prefixed by their 64-bit length, and padded +//! to 8-byte (64-bit) alignment with zero bytes. The zero-length string +//! is therefore encoded as eight zero bytes representing its length. +//! +//! # Grammar +//! +//! The NAR grammar is as follows: +//! ```plain +//! archive ::= "nix-archive-1" node +//! +//! node ::= "(" "type" "symlink" "target" string ")" +//! ||= "(" "type" "regular" ("executable" "")? "contents" string ")" +//! ||= "(" "type" "directory" entry* ")" +//! +//! entry ::= "entry" "(" "name" string "node" node ")" +//! ``` +//! +//! We rewrite it to pull together the purely syntactic elements into +//! unified tokens, producing an equivalent grammar that can be parsed +//! and serialized more elegantly: +//! ```plain +//! archive ::= TOK_NAR node +//! node ::= TOK_SYM string TOK_PAR +//! ||= (TOK_REG | TOK_EXE) string TOK_PAR +//! ||= TOK_DIR entry* TOK_PAR +//! +//! entry ::= TOK_ENT string TOK_NOD node TOK_PAR +//! +//! TOK_NAR ::= "nix-archive-1" "(" "type" +//! TOK_SYM ::= "symlink" "target" +//! TOK_REG ::= "regular" "contents" +//! TOK_EXE ::= "regular" "executable" "" +//! TOK_DIR ::= "directory" +//! TOK_ENT ::= "entry" "(" "name" +//! TOK_NOD ::= "node" "(" "type" +//! TOK_PAR ::= ")" +//! ``` +//! +//! # Restrictions +//! +//! NOTE: These restrictions are not (and cannot be) enforced by this module, +//! but must be enforced by its consumers, [super::reader] and [super::writer]. +//! +//! Directory entry names cannot have the reserved names `.` and `..`, nor contain +//! forward slashes. They must appear in strictly ascending lexicographic order +//! within a directory, and can be at most [MAX_NAME_LEN] bytes in length. +//! +//! Symlink targets can be at most [MAX_TARGET_LEN] bytes in length. +//! +//! Neither is permitted to be empty, or contain null bytes. + +// These values are the standard Linux length limits +/// Maximum length of a directory entry name +pub const MAX_NAME_LEN: usize = 255; +/// Maximum length of a symlink target +pub const MAX_TARGET_LEN: usize = 4095; + +#[cfg(test)] +fn token(xs: &[&str]) -> Vec<u8> { + let mut out = vec![]; + for x in xs { + let len = x.len() as u64; + out.extend_from_slice(&len.to_le_bytes()); + out.extend_from_slice(x.as_bytes()); + + let n = x.len() & 7; + if n != 0 { + const ZERO: [u8; 8] = [0; 8]; + out.extend_from_slice(&ZERO[n..]); + } + } + out +} + +pub const TOK_NAR: [u8; 56] = *b"\x0d\0\0\0\0\0\0\0nix-archive-1\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; +pub const TOK_SYM: [u8; 32] = *b"\x07\0\0\0\0\0\0\0symlink\0\x06\0\0\0\0\0\0\0target\0\0"; +pub const TOK_REG: [u8; 32] = *b"\x07\0\0\0\0\0\0\0regular\0\x08\0\0\0\0\0\0\0contents"; +pub const TOK_EXE: [u8; 64] = *b"\x07\0\0\0\0\0\0\0regular\0\x0a\0\0\0\0\0\0\0executable\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0contents"; +pub const TOK_DIR: [u8; 24] = *b"\x09\0\0\0\0\0\0\0directory\0\0\0\0\0\0\0"; +pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0name\0\0\0\0"; +pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; +pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0"; +#[cfg(feature = "async")] +const TOK_PAD_PAR: [u8; 24] = *b"\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0"; + +#[cfg(feature = "async")] +#[derive(Debug)] +pub(crate) enum PadPar {} + +#[cfg(feature = "async")] +impl crate::wire::reader::Tag for PadPar { + const PATTERN: &'static [u8] = &TOK_PAD_PAR; + + type Buf = [u8; 24]; + + fn make_buf() -> Self::Buf { + [0; 24] + } +} + +#[test] +fn tokens() { + let cases: &[(&[u8], &[&str])] = &[ + (&TOK_NAR, &["nix-archive-1", "(", "type"]), + (&TOK_SYM, &["symlink", "target"]), + (&TOK_REG, &["regular", "contents"]), + (&TOK_EXE, &["regular", "executable", "", "contents"]), + (&TOK_DIR, &["directory"]), + (&TOK_ENT, &["entry", "(", "name"]), + (&TOK_NOD, &["node", "(", "type"]), + (&TOK_PAR, &[")"]), + ]; + + for &(tok, xs) in cases { + assert_eq!(tok, token(xs)); + } +} + +pub use tag::Tag; +mod tag; + +tag::make! { + /// These are the node tokens, succeeding [TOK_NAR] or [TOK_NOD], + /// and preceding the next variable-length element. + pub enum Node[16] { + Sym = TOK_SYM, + Reg = TOK_REG, + Exe = TOK_EXE, + Dir = TOK_DIR, + } + + /// Directory entry or terminator + pub enum Entry[0] { + /// End of directory + None = TOK_PAR, + /// Directory entry + /// Followed by a name string, [TOK_NOD], and a [Node]. + Some = TOK_ENT, + } +} diff --git a/tvix/nix-compat/src/nar/wire/tag.rs b/tvix/nix-compat/src/nar/wire/tag.rs new file mode 100644 index 0000000000..4982a0d707 --- /dev/null +++ b/tvix/nix-compat/src/nar/wire/tag.rs @@ -0,0 +1,166 @@ +/// A type implementing Tag represents a static hash set of byte strings, +/// with a very simple perfect hash function: every element has a unique +/// discriminant at a common byte offset. The values of the type represent +/// the members by this single discriminant byte; they are indices into the +/// hash set. +pub trait Tag: Sized { + /// Discriminant offset + const OFF: usize; + /// Minimum variant length + const MIN: usize; + + /// Minimal suitably sized buffer for reading the wire representation + /// + /// HACK: This is a workaround for const generics limitations. + type Buf: AsMut<[u8]> + Send; + + /// Make an instance of [Self::Buf] + fn make_buf() -> Self::Buf; + + /// Convert a discriminant into the corresponding variant + fn from_u8(x: u8) -> Option<Self>; + + /// Convert a variant back into the wire representation + fn as_bytes(&self) -> &'static [u8]; +} + +/// Generate an enum implementing [Tag], enforcing at compile time that +/// the discriminant values are distinct. +macro_rules! make { + ( + $( + $(#[doc = $doc:expr])* + $vis:vis enum $Enum:ident[$off:expr] { + $( + $(#[doc = $var_doc:expr])* + $Var:ident = $TOK:ident, + )+ + } + )* + ) => { + $( + $(#[doc = $doc])* + #[derive(Debug, PartialEq, Eq)] + #[repr(u8)] + $vis enum $Enum { + $( + $(#[doc = $var_doc])* + $Var = $TOK[$Enum::OFF] + ),+ + } + + impl Tag for $Enum { + /// Discriminant offset + const OFF: usize = $off; + /// Minimum variant length + const MIN: usize = tag::min_of(&[$($TOK.len()),+]); + + /// Minimal suitably sized buffer for reading the wire representation + type Buf = [u8; tag::buf_of(&[$($TOK.len()),+])]; + + /// Make an instance of [Self::Buf] + #[inline(always)] + fn make_buf() -> Self::Buf { + [0u8; tag::buf_of(&[$($TOK.len()),+])] + } + + /// Convert a discriminant into the corresponding variant + #[inline(always)] + fn from_u8(x: u8) -> Option<Self> { + #[allow(non_upper_case_globals)] + mod __variant { + $( + pub const $Var: u8 = super::$Enum::$Var as u8; + )+ + } + + match x { + $(__variant::$Var => Some(Self::$Var),)+ + _ => None + } + } + + /// Convert a variant back into the wire representation + #[inline(always)] + fn as_bytes(&self) -> &'static [u8] { + match self { + $(Self::$Var => &$TOK,)+ + } + } + } + )* + }; +} + +// The following functions are written somewhat unusually, +// since they're const functions that cannot use iterators. + +/// Maximum element of a slice +const fn max_of(mut xs: &[usize]) -> usize { + let mut y = usize::MIN; + while let &[x, ref tail @ ..] = xs { + y = if x > y { x } else { y }; + xs = tail; + } + y +} + +/// Minimum element of a slice +pub const fn min_of(mut xs: &[usize]) -> usize { + let mut y = usize::MAX; + while let &[x, ref tail @ ..] = xs { + y = if x < y { x } else { y }; + xs = tail; + } + y +} + +/// Minimum buffer size to contain either of `0..Tag::MIN` and `Tag::MIN..` +/// at a particular time, for all possible tag wire representations, given +/// the sizes of all wire representations. +/// +/// # Example +/// +/// ```plain +/// OFF = 16 +/// MIN = 24 +/// MAX = 64 +/// +/// BUF = max(MIN, MAX-MIN) +/// = max(24, 64-24) +/// = max(24, 40) +/// = 40 +/// ``` +pub const fn buf_of(xs: &[usize]) -> usize { + max_of(&[min_of(xs), max_of(xs) - min_of(xs)]) +} + +pub(crate) use make; + +#[cfg(test)] +mod test { + use super::super::tag::{self, Tag}; + + const TOK_A: [u8; 3] = [0xed, 0xef, 0x1c]; + const TOK_B: [u8; 3] = [0xed, 0xf0, 0x1c]; + + const OFFSET: usize = 1; + + make! { + enum Token[OFFSET] { + A = TOK_A, + B = TOK_B, + } + } + + #[test] + fn example() { + assert_eq!(Token::from_u8(0xed), None); + + let tag = Token::from_u8(0xef).unwrap(); + assert_eq!(tag.as_bytes(), &TOK_A[..]); + + let tag = Token::from_u8(0xf0).unwrap(); + assert_eq!(tag.as_bytes(), &TOK_B[..]); + } +} diff --git a/tvix/nix-compat/src/nar/writer/async.rs b/tvix/nix-compat/src/nar/writer/async.rs new file mode 100644 index 0000000000..a2ce68fc3c --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/async.rs @@ -0,0 +1,235 @@ +//! Implements an interface for writing the Nix archive format (NAR). +//! +//! NAR files (and their hashed representations) are used in C++ Nix for +//! addressing fixed-output derivations and a variety of other things. +//! +//! NAR files can be output to any type that implements [`AsyncWrite`], and content +//! can be read from any type that implementes [`AsyncBufRead`]. +//! +//! Writing a single file might look like this: +//! +//! ```rust +//! # futures::executor::block_on(async { +//! # use tokio::io::BufReader; +//! # let some_file: Vec<u8> = vec![0, 1, 2, 3, 4]; +//! +//! // Output location to write the NAR to. +//! let mut sink: Vec<u8> = Vec::new(); +//! +//! // Instantiate writer for this output location. +//! let mut nar = nix_compat::nar::writer::r#async::open(&mut sink).await?; +//! +//! // Acquire metadata for the single file to output, and pass it in a +//! // `BufRead`-implementing type. +//! +//! let executable = false; +//! let size = some_file.len() as u64; +//! let mut reader = BufReader::new(some_file.as_slice()); +//! nar.file(executable, size, &mut reader).await?; +//! # Ok::<(), std::io::Error>(()) +//! # }); +//! ``` + +use crate::nar::wire; +use std::{ + io::{ + self, + ErrorKind::{InvalidInput, UnexpectedEof}, + }, + pin::Pin, +}; +use tokio::io::{AsyncBufRead, AsyncBufReadExt, AsyncWrite, AsyncWriteExt}; + +/// Convenience type alias for types implementing [`AsyncWrite`]. +pub type Writer<'a> = dyn AsyncWrite + Unpin + Send + 'a; + +/// Create a new NAR, writing the output to the specified writer. +pub async fn open<'a, 'w: 'a>(writer: &'a mut Writer<'w>) -> io::Result<Node<'a, 'w>> { + let mut node = Node { writer }; + node.write(&wire::TOK_NAR).await?; + Ok(node) +} + +/// Single node in a NAR file. +/// +/// A NAR can be thought of as a tree of nodes represented by this type. Each +/// node can be a file, a symlink or a directory containing other nodes. +pub struct Node<'a, 'w: 'a> { + writer: &'a mut Writer<'w>, +} + +impl<'a, 'w> Node<'a, 'w> { + async fn write(&mut self, data: &[u8]) -> io::Result<()> { + self.writer.write_all(data).await + } + + async fn pad(&mut self, n: u64) -> io::Result<()> { + match (n & 7) as usize { + 0 => Ok(()), + n => self.write(&[0; 8][n..]).await, + } + } + + /// Make this node a symlink. + pub async fn symlink(mut self, target: &[u8]) -> io::Result<()> { + debug_assert!( + target.len() <= wire::MAX_TARGET_LEN, + "target.len() > {}", + wire::MAX_TARGET_LEN + ); + debug_assert!(!target.is_empty(), "target is empty"); + debug_assert!(!target.contains(&0), "target contains null byte"); + + self.write(&wire::TOK_SYM).await?; + self.write(&target.len().to_le_bytes()).await?; + self.write(target).await?; + self.pad(target.len() as u64).await?; + self.write(&wire::TOK_PAR).await?; + Ok(()) + } + + /// Make this node a single file. + pub async fn file( + mut self, + executable: bool, + size: u64, + reader: &mut (dyn AsyncBufRead + Unpin + Send), + ) -> io::Result<()> { + self.write(if executable { + &wire::TOK_EXE + } else { + &wire::TOK_REG + }) + .await?; + + self.write(&size.to_le_bytes()).await?; + + let mut need = size; + while need != 0 { + let data = reader.fill_buf().await?; + + if data.is_empty() { + return Err(UnexpectedEof.into()); + } + + let n = need.min(data.len() as u64) as usize; + self.write(&data[..n]).await?; + + need -= n as u64; + Pin::new(&mut *reader).consume(n); + } + + // bail if there's still data left in the passed reader. + // This uses the same code as [BufRead::has_data_left] (unstable). + if reader.fill_buf().await.map(|b| !b.is_empty())? { + return Err(io::Error::new( + InvalidInput, + "reader contained more data than specified size", + )); + } + + self.pad(size).await?; + self.write(&wire::TOK_PAR).await?; + + Ok(()) + } + + /// Make this node a directory, the content of which is set using the + /// resulting [`Directory`] value. + /// + /// It is the caller's responsibility to invoke [`Directory::close`], + /// or invalid archives will be produced silently. + pub async fn directory(mut self) -> io::Result<Directory<'a, 'w>> { + self.write(&wire::TOK_DIR).await?; + Ok(Directory::new(self)) + } +} + +#[cfg(debug_assertions)] +type Name = Vec<u8>; +#[cfg(not(debug_assertions))] +type Name = (); + +fn into_name(_name: &[u8]) -> Name { + #[cfg(debug_assertions)] + _name.to_owned() +} + +/// Content of a NAR node that represents a directory. +pub struct Directory<'a, 'w> { + node: Node<'a, 'w>, + prev_name: Option<Name>, +} + +impl<'a, 'w> Directory<'a, 'w> { + fn new(node: Node<'a, 'w>) -> Self { + Self { + node, + prev_name: None, + } + } + + /// Add an entry to the directory. + /// + /// The entry is simply another [`Node`], which can then be filled like the + /// root of a NAR (including, of course, by nesting directories). + /// + /// It is the caller's responsibility to ensure that directory entries are + /// written in order of ascending name. If this is not ensured, this method + /// may panic or silently produce invalid archives. + pub async fn entry(&mut self, name: &[u8]) -> io::Result<Node<'_, 'w>> { + debug_assert!( + name.len() <= wire::MAX_NAME_LEN, + "name.len() > {}", + wire::MAX_NAME_LEN + ); + debug_assert!(!name.is_empty(), "name is empty"); + debug_assert!(!name.contains(&0), "name contains null byte"); + debug_assert!(!name.contains(&b'/'), "name contains {:?}", '/'); + debug_assert!(name != b".", "name == {:?}", "."); + debug_assert!(name != b"..", "name == {:?}", ".."); + + match self.prev_name { + None => { + self.prev_name = Some(into_name(name)); + } + Some(ref mut _prev_name) => { + #[cfg(debug_assertions)] + { + use bstr::ByteSlice; + assert!( + &**_prev_name < name, + "misordered names: {:?} >= {:?}", + _prev_name.as_bstr(), + name.as_bstr() + ); + name.clone_into(_prev_name); + } + self.node.write(&wire::TOK_PAR).await?; + } + } + + self.node.write(&wire::TOK_ENT).await?; + self.node.write(&name.len().to_le_bytes()).await?; + self.node.write(name).await?; + self.node.pad(name.len() as u64).await?; + self.node.write(&wire::TOK_NOD).await?; + + Ok(Node { + writer: &mut *self.node.writer, + }) + } + + /// Close a directory and write terminators for the directory to the NAR. + /// + /// **Important:** This *must* be called when all entries have been written + /// in a directory, otherwise the resulting NAR file will be invalid. + pub async fn close(mut self) -> io::Result<()> { + if self.prev_name.is_some() { + self.node.write(&wire::TOK_PAR).await?; + } + + self.node.write(&wire::TOK_PAR).await?; + Ok(()) + } +} diff --git a/tvix/nix-compat/src/nar/writer/mod.rs b/tvix/nix-compat/src/nar/writer/mod.rs new file mode 100644 index 0000000000..fe8ccccb37 --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/mod.rs @@ -0,0 +1,9 @@ +pub use sync::*; + +pub mod sync; + +#[cfg(test)] +mod test; + +#[cfg(feature = "async")] +pub mod r#async; diff --git a/tvix/nix-compat/src/nar/writer/sync.rs b/tvix/nix-compat/src/nar/writer/sync.rs new file mode 100644 index 0000000000..6270129028 --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/sync.rs @@ -0,0 +1,224 @@ +//! Implements an interface for writing the Nix archive format (NAR). +//! +//! NAR files (and their hashed representations) are used in C++ Nix for +//! addressing fixed-output derivations and a variety of other things. +//! +//! NAR files can be output to any type that implements [`Write`], and content +//! can be read from any type that implementes [`BufRead`]. +//! +//! Writing a single file might look like this: +//! +//! ```rust +//! # use std::io::BufReader; +//! # let some_file: Vec<u8> = vec![0, 1, 2, 3, 4]; +//! +//! // Output location to write the NAR to. +//! let mut sink: Vec<u8> = Vec::new(); +//! +//! // Instantiate writer for this output location. +//! let mut nar = nix_compat::nar::writer::open(&mut sink)?; +//! +//! // Acquire metadata for the single file to output, and pass it in a +//! // `BufRead`-implementing type. +//! +//! let executable = false; +//! let size = some_file.len() as u64; +//! let mut reader = BufReader::new(some_file.as_slice()); +//! nar.file(executable, size, &mut reader)?; +//! # Ok::<(), std::io::Error>(()) +//! ``` + +use crate::nar::wire; +use std::io::{ + self, BufRead, + ErrorKind::{InvalidInput, UnexpectedEof}, + Write, +}; + +/// Convenience type alias for types implementing [`Write`]. +pub type Writer<'a> = dyn Write + Send + 'a; + +/// Create a new NAR, writing the output to the specified writer. +pub fn open<'a, 'w: 'a>(writer: &'a mut Writer<'w>) -> io::Result<Node<'a, 'w>> { + let mut node = Node { writer }; + node.write(&wire::TOK_NAR)?; + Ok(node) +} + +/// Single node in a NAR file. +/// +/// A NAR can be thought of as a tree of nodes represented by this type. Each +/// node can be a file, a symlink or a directory containing other nodes. +pub struct Node<'a, 'w: 'a> { + writer: &'a mut Writer<'w>, +} + +impl<'a, 'w> Node<'a, 'w> { + fn write(&mut self, data: &[u8]) -> io::Result<()> { + self.writer.write_all(data) + } + + fn pad(&mut self, n: u64) -> io::Result<()> { + match (n & 7) as usize { + 0 => Ok(()), + n => self.write(&[0; 8][n..]), + } + } + + /// Make this node a symlink. + pub fn symlink(mut self, target: &[u8]) -> io::Result<()> { + debug_assert!( + target.len() <= wire::MAX_TARGET_LEN, + "target.len() > {}", + wire::MAX_TARGET_LEN + ); + debug_assert!(!target.is_empty(), "target is empty"); + debug_assert!(!target.contains(&0), "target contains null byte"); + + self.write(&wire::TOK_SYM)?; + self.write(&target.len().to_le_bytes())?; + self.write(target)?; + self.pad(target.len() as u64)?; + self.write(&wire::TOK_PAR)?; + Ok(()) + } + + /// Make this node a single file. + pub fn file(mut self, executable: bool, size: u64, reader: &mut dyn BufRead) -> io::Result<()> { + self.write(if executable { + &wire::TOK_EXE + } else { + &wire::TOK_REG + })?; + + self.write(&size.to_le_bytes())?; + + let mut need = size; + while need != 0 { + let data = reader.fill_buf()?; + + if data.is_empty() { + return Err(UnexpectedEof.into()); + } + + let n = need.min(data.len() as u64) as usize; + self.write(&data[..n])?; + + need -= n as u64; + reader.consume(n); + } + + // bail if there's still data left in the passed reader. + // This uses the same code as [BufRead::has_data_left] (unstable). + if reader.fill_buf().map(|b| !b.is_empty())? { + return Err(io::Error::new( + InvalidInput, + "reader contained more data than specified size", + )); + } + + self.pad(size)?; + self.write(&wire::TOK_PAR)?; + + Ok(()) + } + + /// Make this node a directory, the content of which is set using the + /// resulting [`Directory`] value. + /// + /// It is the caller's responsibility to invoke [`Directory::close`], + /// or invalid archives will be produced silently. + pub fn directory(mut self) -> io::Result<Directory<'a, 'w>> { + self.write(&wire::TOK_DIR)?; + Ok(Directory::new(self)) + } +} + +#[cfg(debug_assertions)] +type Name = Vec<u8>; +#[cfg(not(debug_assertions))] +type Name = (); + +fn into_name(_name: &[u8]) -> Name { + #[cfg(debug_assertions)] + _name.to_owned() +} + +/// Content of a NAR node that represents a directory. +pub struct Directory<'a, 'w> { + node: Node<'a, 'w>, + prev_name: Option<Name>, +} + +impl<'a, 'w> Directory<'a, 'w> { + fn new(node: Node<'a, 'w>) -> Self { + Self { + node, + prev_name: None, + } + } + + /// Add an entry to the directory. + /// + /// The entry is simply another [`Node`], which can then be filled like the + /// root of a NAR (including, of course, by nesting directories). + /// + /// It is the caller's responsibility to ensure that directory entries are + /// written in order of ascending name. If this is not ensured, this method + /// may panic or silently produce invalid archives. + pub fn entry(&mut self, name: &[u8]) -> io::Result<Node<'_, 'w>> { + debug_assert!( + name.len() <= wire::MAX_NAME_LEN, + "name.len() > {}", + wire::MAX_NAME_LEN + ); + debug_assert!(!name.is_empty(), "name is empty"); + debug_assert!(!name.contains(&0), "name contains null byte"); + debug_assert!(!name.contains(&b'/'), "name contains {:?}", '/'); + debug_assert!(name != b".", "name == {:?}", "."); + debug_assert!(name != b"..", "name == {:?}", ".."); + + match self.prev_name { + None => { + self.prev_name = Some(into_name(name)); + } + Some(ref mut _prev_name) => { + #[cfg(debug_assertions)] + { + use bstr::ByteSlice; + assert!( + &**_prev_name < name, + "misordered names: {:?} >= {:?}", + _prev_name.as_bstr(), + name.as_bstr() + ); + name.clone_into(_prev_name); + } + self.node.write(&wire::TOK_PAR)?; + } + } + + self.node.write(&wire::TOK_ENT)?; + self.node.write(&name.len().to_le_bytes())?; + self.node.write(name)?; + self.node.pad(name.len() as u64)?; + self.node.write(&wire::TOK_NOD)?; + + Ok(Node { + writer: &mut *self.node.writer, + }) + } + + /// Close a directory and write terminators for the directory to the NAR. + /// + /// **Important:** This *must* be called when all entries have been written + /// in a directory, otherwise the resulting NAR file will be invalid. + pub fn close(mut self) -> io::Result<()> { + if self.prev_name.is_some() { + self.node.write(&wire::TOK_PAR)?; + } + + self.node.write(&wire::TOK_PAR)?; + Ok(()) + } +} diff --git a/tvix/nix-compat/src/nar/writer/test.rs b/tvix/nix-compat/src/nar/writer/test.rs new file mode 100644 index 0000000000..d7f18a49af --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/test.rs @@ -0,0 +1,128 @@ +use crate::nar; + +#[test] +fn symlink() { + let mut buf = vec![]; + let node = nar::writer::open(&mut buf).unwrap(); + + node.symlink("/nix/store/somewhereelse".as_bytes()).unwrap(); + + assert_eq!(include_bytes!("../tests/symlink.nar"), buf.as_slice()); +} + +#[cfg(feature = "async")] +#[tokio::test] +async fn symlink_async() { + let mut buf = vec![]; + + let node = nar::writer::r#async::open(&mut buf).await.unwrap(); + node.symlink("/nix/store/somewhereelse".as_bytes()) + .await + .unwrap(); + + assert_eq!(include_bytes!("../tests/symlink.nar"), buf.as_slice()); +} + +#[test] +fn file() { + let mut buf = vec![]; + let node = nar::writer::open(&mut buf).unwrap(); + + let file_contents = "Hello World!".to_string(); + node.file( + false, + file_contents.len() as u64, + &mut std::io::Cursor::new(file_contents), + ) + .unwrap(); + + assert_eq!(include_bytes!("../tests/helloworld.nar"), buf.as_slice()); +} + +#[cfg(feature = "async")] +#[tokio::test] +async fn file_async() { + use std::io::Cursor; + + let mut buf = vec![]; + + let node = nar::writer::r#async::open(&mut buf).await.unwrap(); + + let file_contents = "Hello World!".to_string(); + node.file( + false, + file_contents.len() as u64, + &mut Cursor::new(file_contents), + ) + .await + .unwrap(); + + assert_eq!(include_bytes!("../tests/helloworld.nar"), buf.as_slice()); +} + +#[test] +fn complicated() { + let mut buf = vec![]; + let node = nar::writer::open(&mut buf).unwrap(); + + let mut dir_node = node.directory().unwrap(); + + let e = dir_node.entry(".keep".as_bytes()).unwrap(); + e.file(false, 0, &mut std::io::Cursor::new([])) + .expect("read .keep must succeed"); + + let e = dir_node.entry("aa".as_bytes()).unwrap(); + e.symlink("/nix/store/somewhereelse".as_bytes()) + .expect("symlink must succeed"); + + let e = dir_node.entry("keep".as_bytes()).unwrap(); + let mut subdir_node = e.directory().expect("directory must succeed"); + + let e_sub = subdir_node + .entry(".keep".as_bytes()) + .expect("subdir entry must succeed"); + e_sub.file(false, 0, &mut std::io::Cursor::new([])).unwrap(); + + // close the subdir, and then the dir, which is required. + subdir_node.close().unwrap(); + dir_node.close().unwrap(); + + assert_eq!(include_bytes!("../tests/complicated.nar"), buf.as_slice()); +} + +#[cfg(feature = "async")] +#[tokio::test] +async fn complicated_async() { + use std::io::Cursor; + + let mut buf = vec![]; + + let node = nar::writer::r#async::open(&mut buf).await.unwrap(); + + let mut dir_node = node.directory().await.unwrap(); + + let e = dir_node.entry(".keep".as_bytes()).await.unwrap(); + e.file(false, 0, &mut Cursor::new([])) + .await + .expect("read .keep must succeed"); + + let e = dir_node.entry("aa".as_bytes()).await.unwrap(); + e.symlink("/nix/store/somewhereelse".as_bytes()) + .await + .expect("symlink must succeed"); + + let e = dir_node.entry("keep".as_bytes()).await.unwrap(); + let mut subdir_node = e.directory().await.expect("directory must succeed"); + + let e_sub = subdir_node + .entry(".keep".as_bytes()) + .await + .expect("subdir entry must succeed"); + e_sub.file(false, 0, &mut Cursor::new([])).await.unwrap(); + + // close the subdir, and then the dir, which is required. + subdir_node.close().await.unwrap(); + dir_node.close().await.unwrap(); + + assert_eq!(include_bytes!("../tests/complicated.nar"), buf.as_slice()); +} diff --git a/tvix/nix-compat/src/narinfo/fingerprint.rs b/tvix/nix-compat/src/narinfo/fingerprint.rs new file mode 100644 index 0000000000..3e02aca571 --- /dev/null +++ b/tvix/nix-compat/src/narinfo/fingerprint.rs @@ -0,0 +1,50 @@ +use crate::{nixbase32, store_path::StorePathRef}; + +/// Computes the fingerprint string for certain fields in a [super::NarInfo]. +/// This fingerprint is signed by an ed25519 key, and in the case of a Nix HTTP +/// Binary cache, included in the NARInfo files served from there. +pub fn fingerprint<'a, R: Iterator<Item = &'a StorePathRef<'a>>>( + store_path: &StorePathRef, + nar_sha256: &[u8; 32], + nar_size: u64, + references: R, +) -> String { + format!( + "1;{};sha256:{};{};{}", + store_path.to_absolute_path(), + nixbase32::encode(nar_sha256), + nar_size, + // references are absolute paths, joined with `,`. + references + .map(|r| r.to_absolute_path()) + .collect::<Vec<String>>() + .join(",") + ) +} + +#[cfg(test)] +mod tests { + use crate::narinfo::NarInfo; + + const NARINFO_STR: &str = r#"StorePath: /nix/store/syd87l2rxw8cbsxmxl853h0r6pdwhwjr-curl-7.82.0-bin +URL: nar/05ra3y72i3qjri7xskf9qj8kb29r6naqy1sqpbs3azi3xcigmj56.nar.xz +Compression: xz +FileHash: sha256:05ra3y72i3qjri7xskf9qj8kb29r6naqy1sqpbs3azi3xcigmj56 +FileSize: 68852 +NarHash: sha256:1b4sb93wp679q4zx9k1ignby1yna3z7c4c2ri3wphylbc2dwsys0 +NarSize: 196040 +References: 0jqd0rlxzra1rs38rdxl43yh6rxchgc6-curl-7.82.0 6w8g7njm4mck5dmjxws0z1xnrxvl81xa-glibc-2.34-115 j5jxw3iy7bbz4a57fh9g2xm2gxmyal8h-zlib-1.2.12 yxvjs9drzsphm9pcf42a4byzj1kb9m7k-openssl-1.1.1n +Deriver: 5rwxzi7pal3qhpsyfc16gzkh939q1np6-curl-7.82.0.drv +Sig: cache.nixos.org-1:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ== +Sig: test1:519iiVLx/c4Rdt5DNt6Y2Jm6hcWE9+XY69ygiWSZCNGVcmOcyL64uVAJ3cV8vaTusIZdbTnYo9Y7vDNeTmmMBQ== +"#; + + #[test] + fn fingerprint() { + let parsed = NarInfo::parse(NARINFO_STR).expect("must parse"); + assert_eq!( + "1;/nix/store/syd87l2rxw8cbsxmxl853h0r6pdwhwjr-curl-7.82.0-bin;sha256:1b4sb93wp679q4zx9k1ignby1yna3z7c4c2ri3wphylbc2dwsys0;196040;/nix/store/0jqd0rlxzra1rs38rdxl43yh6rxchgc6-curl-7.82.0,/nix/store/6w8g7njm4mck5dmjxws0z1xnrxvl81xa-glibc-2.34-115,/nix/store/j5jxw3iy7bbz4a57fh9g2xm2gxmyal8h-zlib-1.2.12,/nix/store/yxvjs9drzsphm9pcf42a4byzj1kb9m7k-openssl-1.1.1n", + parsed.fingerprint() + ); + } +} diff --git a/tvix/nix-compat/src/narinfo/mod.rs b/tvix/nix-compat/src/narinfo/mod.rs new file mode 100644 index 0000000000..b1c10bceb2 --- /dev/null +++ b/tvix/nix-compat/src/narinfo/mod.rs @@ -0,0 +1,527 @@ +//! NAR info files describe a store path in a traditional Nix binary cache. +//! Over the wire, they are formatted as "Key: value" pairs separated by newlines. +//! +//! It contains four kinds of information: +//! 1. the description of the store path itself +//! * store path prefix, digest, and name +//! * NAR hash and size +//! * references +//! 2. authenticity information +//! * zero or more signatures over that description +//! * an optional [CAHash] for content-addressed paths (fixed outputs, sources, and derivations) +//! 3. derivation metadata +//! * deriver (the derivation that produced this path) +//! * system (the system value of that derivation) +//! 4. cache-specific information +//! * URL of the compressed NAR, relative to the NAR info file +//! * compression algorithm used for the NAR +//! * hash and size of the compressed NAR + +use bitflags::bitflags; +use data_encoding::HEXLOWER; +use std::{ + fmt::{self, Display}, + mem, +}; + +use crate::{nixbase32, nixhash::CAHash, store_path::StorePathRef}; + +mod fingerprint; +mod public_keys; +mod signature; + +pub use fingerprint::fingerprint; + +pub use public_keys::{Error as PubKeyError, PubKey}; +pub use signature::{Error as SignatureError, Signature}; + +#[derive(Debug)] +pub struct NarInfo<'a> { + pub flags: Flags, + // core (authenticated, but unverified here) + /// Store path described by this [NarInfo] + pub store_path: StorePathRef<'a>, + /// SHA-256 digest of the NAR file + pub nar_hash: [u8; 32], + /// Size of the NAR file in bytes + pub nar_size: u64, + /// Store paths known to be referenced by the contents + pub references: Vec<StorePathRef<'a>>, + // authenticity + /// Ed25519 signature over the path fingerprint + pub signatures: Vec<Signature<'a>>, + /// Content address (for content-defined paths) + pub ca: Option<CAHash>, + // derivation metadata + /// Nix system triple of [NarInfo::deriver] + pub system: Option<&'a str>, + /// Store path of the derivation that produced this. The last .drv suffix is stripped. + pub deriver: Option<StorePathRef<'a>>, + // cache-specific untrusted metadata + /// Relative URL of the compressed NAR file + pub url: &'a str, + /// Compression method of the NAR file + /// `None` means `Compression: none`. + /// + /// Nix interprets a missing `Compression` field as `Some("bzip2")`, + /// so we do as well. We haven't found any examples of this in the + /// wild, not even in the cache.nixos.org dataset. + pub compression: Option<&'a str>, + /// SHA-256 digest of the file at `url` + pub file_hash: Option<[u8; 32]>, + /// Size of the file at `url` in bytes + pub file_size: Option<u64>, +} + +bitflags! { + /// TODO(edef): be conscious of these when roundtripping + #[derive(Debug, Copy, Clone)] + pub struct Flags: u8 { + const UNKNOWN_FIELD = 1 << 0; + const COMPRESSION_DEFAULT = 1 << 1; + // Format quirks encountered in the cache.nixos.org dataset + const REFERENCES_OUT_OF_ORDER = 1 << 2; + const NAR_HASH_HEX = 1 << 3; + } +} + +impl<'a> NarInfo<'a> { + pub fn parse(input: &'a str) -> Result<Self, Error> { + let mut flags = Flags::empty(); + let mut store_path = None; + let mut url = None; + let mut compression = None; + let mut file_hash = None; + let mut file_size = None; + let mut nar_hash = None; + let mut nar_size = None; + let mut references = None; + let mut system = None; + let mut deriver = None; + let mut signatures = vec![]; + let mut ca = None; + + for line in input.lines() { + let (tag, val) = line + .split_once(':') + .ok_or_else(|| Error::InvalidLine(line.to_string()))?; + + let val = val + .strip_prefix(' ') + .ok_or_else(|| Error::InvalidLine(line.to_string()))?; + + match tag { + "StorePath" => { + let val = val + .strip_prefix("/nix/store/") + .ok_or(Error::InvalidStorePath( + crate::store_path::Error::MissingStoreDir, + ))?; + let val = StorePathRef::from_bytes(val.as_bytes()) + .map_err(Error::InvalidStorePath)?; + + if store_path.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "URL" => { + if val.is_empty() { + return Err(Error::EmptyField(tag.to_string())); + } + + if url.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "Compression" => { + if val.is_empty() { + return Err(Error::EmptyField(tag.to_string())); + } + + if compression.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "FileHash" => { + let val = val + .strip_prefix("sha256:") + .ok_or_else(|| Error::MissingPrefixForHash(tag.to_string()))?; + let val = nixbase32::decode_fixed::<32>(val) + .map_err(|e| Error::UnableToDecodeHash(tag.to_string(), e))?; + + if file_hash.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "FileSize" => { + let val = val + .parse::<u64>() + .map_err(|_| Error::UnableToParseSize(tag.to_string(), val.to_string()))?; + + if file_size.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "NarHash" => { + let val = val + .strip_prefix("sha256:") + .ok_or_else(|| Error::MissingPrefixForHash(tag.to_string()))?; + + let val = if val.len() != HEXLOWER.encode_len(32) { + nixbase32::decode_fixed::<32>(val) + } else { + flags |= Flags::NAR_HASH_HEX; + + let val = val.as_bytes(); + let mut buf = [0u8; 32]; + + HEXLOWER + .decode_mut(val, &mut buf) + .map_err(|e| e.error) + .map(|_| buf) + }; + + let val = val.map_err(|e| Error::UnableToDecodeHash(tag.to_string(), e))?; + + if nar_hash.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "NarSize" => { + let val = val + .parse::<u64>() + .map_err(|_| Error::UnableToParseSize(tag.to_string(), val.to_string()))?; + + if nar_size.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "References" => { + let val: Vec<StorePathRef> = if !val.is_empty() { + let mut prev = ""; + val.split(' ') + .enumerate() + .map(|(i, s)| { + // TODO(edef): track *duplicates* if this occurs + if mem::replace(&mut prev, s) >= s { + flags |= Flags::REFERENCES_OUT_OF_ORDER; + } + + StorePathRef::from_bytes(s.as_bytes()) + .map_err(|err| Error::InvalidReference(i, err)) + }) + .collect::<Result<_, _>>()? + } else { + vec![] + }; + + if references.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "System" => { + if val.is_empty() { + return Err(Error::EmptyField(tag.to_string())); + } + + if system.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "Deriver" => { + match val.strip_suffix(".drv") { + Some(val) => { + let val = StorePathRef::from_bytes(val.as_bytes()) + .map_err(Error::InvalidDeriverStorePath)?; + + if deriver.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + None => { + return Err(Error::InvalidDeriverStorePathMissingSuffix); + } + }; + } + "Sig" => { + let val = Signature::parse(val) + .map_err(|e| Error::UnableToParseSignature(signatures.len(), e))?; + + signatures.push(val); + } + "CA" => { + let val = CAHash::from_nix_hex_str(val) + .ok_or_else(|| Error::UnableToParseCA(val.to_string()))?; + + if ca.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + _ => { + flags |= Flags::UNKNOWN_FIELD; + } + } + } + + Ok(NarInfo { + store_path: store_path.ok_or(Error::MissingField("StorePath"))?, + nar_hash: nar_hash.ok_or(Error::MissingField("NarHash"))?, + nar_size: nar_size.ok_or(Error::MissingField("NarSize"))?, + references: references.ok_or(Error::MissingField("References"))?, + signatures, + ca, + system, + deriver, + url: url.ok_or(Error::MissingField("URL"))?, + compression: match compression { + Some("none") => None, + None => { + flags |= Flags::COMPRESSION_DEFAULT; + Some("bzip2") + } + _ => compression, + }, + file_hash, + file_size, + flags, + }) + } + + /// Computes the fingerprint string for certain fields in this [NarInfo]. + /// This fingerprint is signed in [self.signatures]. + pub fn fingerprint(&self) -> String { + fingerprint( + &self.store_path, + &self.nar_hash, + self.nar_size, + self.references.iter(), + ) + } +} + +impl Display for NarInfo<'_> { + fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { + writeln!(w, "StorePath: /nix/store/{}", self.store_path)?; + writeln!(w, "URL: {}", self.url)?; + + if let Some(compression) = self.compression { + writeln!(w, "Compression: {compression}")?; + } + + if let Some(file_hash) = self.file_hash { + writeln!(w, "FileHash: sha256:{}", nixbase32::encode(&file_hash),)?; + } + + if let Some(file_size) = self.file_size { + writeln!(w, "FileSize: {file_size}")?; + } + + writeln!(w, "NarHash: sha256:{}", nixbase32::encode(&self.nar_hash),)?; + writeln!(w, "NarSize: {}", self.nar_size)?; + + write!(w, "References:")?; + if self.references.is_empty() { + write!(w, " ")?; + } else { + for path in &self.references { + write!(w, " {path}")?; + } + } + writeln!(w)?; + + if let Some(deriver) = &self.deriver { + writeln!(w, "Deriver: {deriver}.drv")?; + } + + if let Some(system) = self.system { + writeln!(w, "System: {system}")?; + } + + for sig in &self.signatures { + writeln!(w, "Sig: {sig}")?; + } + + if let Some(ca) = &self.ca { + writeln!(w, "CA: {}", ca.to_nix_nixbase32_string())?; + } + + Ok(()) + } +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("duplicate field: {0}")] + DuplicateField(String), + + #[error("missing field: {0}")] + MissingField(&'static str), + + #[error("invalid line: {0}")] + InvalidLine(String), + + #[error("invalid StorePath: {0}")] + InvalidStorePath(crate::store_path::Error), + + #[error("field {0} may not be empty string")] + EmptyField(String), + + #[error("invalid {0}: {1}")] + UnableToParseSize(String, String), + + #[error("unable to parse #{0} reference: {1}")] + InvalidReference(usize, crate::store_path::Error), + + #[error("invalid Deriver store path: {0}")] + InvalidDeriverStorePath(crate::store_path::Error), + + #[error("invalid Deriver store path, must end with .drv")] + InvalidDeriverStorePathMissingSuffix, + + #[error("missing prefix for {0}")] + MissingPrefixForHash(String), + + #[error("unable to decode {0}: {1}")] + UnableToDecodeHash(String, data_encoding::DecodeError), + + #[error("unable to parse signature #{0}: {1}")] + UnableToParseSignature(usize, SignatureError), + + #[error("unable to parse CA field: {0}")] + UnableToParseCA(String), +} + +#[cfg(test)] +mod test { + use hex_literal::hex; + use lazy_static::lazy_static; + use pretty_assertions::assert_eq; + use std::{io, str}; + + use crate::{ + nixhash::{CAHash, NixHash}, + store_path::StorePathRef, + }; + + use super::{Flags, NarInfo}; + + lazy_static! { + static ref CASES: &'static [&'static str] = { + let data = zstd::decode_all(io::Cursor::new(include_bytes!( + "../../testdata/narinfo.zst" + ))) + .unwrap(); + let data = str::from_utf8(Vec::leak(data)).unwrap(); + Vec::leak( + data.split_inclusive("\n\n") + .map(|s| s.strip_suffix('\n').unwrap()) + .collect::<Vec<_>>(), + ) + }; + } + + #[test] + fn roundtrip() { + for &input in *CASES { + let parsed = NarInfo::parse(input).expect("should parse"); + let output = format!("{parsed}"); + assert_eq!(input, output, "should roundtrip"); + } + } + + #[test] + fn references_out_of_order() { + let parsed = NarInfo::parse( + r#"StorePath: /nix/store/xi429w4ddvb1r77978hm7jfb2jsn559r-gcc-3.4.6 +URL: nar/1hr09cgkyw1hcsfkv5qp5jlpmf2mqrkrqs3xj5zklq9c1h9544ff.nar.bz2 +Compression: bzip2 +FileHash: sha256:1hr09cgkyw1hcsfkv5qp5jlpmf2mqrkrqs3xj5zklq9c1h9544ff +FileSize: 4006 +NarHash: sha256:0ik9mpqxpd9hv325hdblj2nawqj5w7951qdyy8ikxgwr6fq7m11c +NarSize: 21264 +References: a8922c0h87iilxzzvwn2hmv8x210aqb9-glibc-2.7 7w2acjgalb0cm7b3bg8yswza4l7iil9y-binutils-2.18 mm631h09mj964hm9q04l5fd8vw12j1mm-bash-3.2-p39 nx2zs2qd6snfcpzw4a0jnh26z9m0yihz-gcc-3.4.6 xi429w4ddvb1r77978hm7jfb2jsn559r-gcc-3.4.6 +Deriver: 2dzpn70c1hawczwhg9aavqk18zp9zsva-gcc-3.4.6.drv +Sig: cache.nixos.org-1:o1DTsjCz0PofLJ216P2RBuSulI8BAb6zHxWE4N+tzlcELk5Uk/GO2SCxWTRN5wJutLZZ+cHTMdWqOHF88KGQDg== +"#).expect("should parse"); + + assert!(parsed.flags.contains(Flags::REFERENCES_OUT_OF_ORDER)); + assert_eq!( + vec![ + "a8922c0h87iilxzzvwn2hmv8x210aqb9-glibc-2.7", + "7w2acjgalb0cm7b3bg8yswza4l7iil9y-binutils-2.18", + "mm631h09mj964hm9q04l5fd8vw12j1mm-bash-3.2-p39", + "nx2zs2qd6snfcpzw4a0jnh26z9m0yihz-gcc-3.4.6", + "xi429w4ddvb1r77978hm7jfb2jsn559r-gcc-3.4.6" + ], + parsed + .references + .iter() + .map(StorePathRef::to_string) + .collect::<Vec<_>>(), + ); + } + + #[test] + fn ca_nar_hash_sha1() { + let parsed = NarInfo::parse( + r#"StorePath: /nix/store/k20pahypzvr49fy82cw5sx72hdfg3qcr-texlive-hyphenex-37354 +URL: nar/0i5biw0g01514llhfswxy6xfav8lxxdq1xg6ik7hgsqbpw0f06yi.nar.xz +Compression: xz +FileHash: sha256:0i5biw0g01514llhfswxy6xfav8lxxdq1xg6ik7hgsqbpw0f06yi +FileSize: 7120 +NarHash: sha256:0h1bm4sj1cnfkxgyhvgi8df1qavnnv94sd0v09wcrm971602shfg +NarSize: 22552 +References: +Sig: cache.nixos.org-1:u01BybwQhyI5H1bW1EIWXssMDhDDIvXOG5uh8Qzgdyjz6U1qg6DHhMAvXZOUStIj6X5t4/ufFgR8i3fjf0bMAw== +CA: fixed:r:sha1:1ak1ymbmsfx7z8kh09jzkr3a4dvkrfjw +"#).expect("should parse"); + + assert_eq!( + parsed.ca, + Some(CAHash::Nar(NixHash::Sha1(hex!( + "5cba3c77236ae4f9650270a27fbad375551fa60a" + )))) + ); + } + + #[test] + fn compression_default() { + // This doesn't exist as such in cache.nixos.org. + // We explicitly removed the compression field for the sake of this test. + let parsed = NarInfo::parse(r#"StorePath: /nix/store/a1jjalr4csx9hcga7fnm122aqabrjnch-digikam-2.6.0 +URL: nar/1fzimfnvq2k8b40n4g54abmncpx2ddckh6qlb77pgq6xiysyil69.nar.bz2 +FileHash: sha256:1fzimfnvq2k8b40n4g54abmncpx2ddckh6qlb77pgq6xiysyil69 +FileSize: 43503778 +NarHash: sha256:0zpbbwipqzr5p8mlpag9wrsp5hlaxkq7gax5jj0hg3vvdziypcw5 +NarSize: 100658640 +References: 0izkyk7bq2ag9393nvnhgm87p75cq09w-liblqr-1-0.4.1 1cslpgyb7vb30inj3210jv6agqv42jxz-qca-2.0.3 1sya3bwjxkzpkmwn67gfzp4gz4g62l36-libXrandr-1.3.1 26yxdaa9z0ma5sgw02i670rsqnl57crs-glib-2.30.3 27lnjh99236kmhbpc5747599zcymfzmg-qt-4.8.2 2v6x378vcfvyxilkvihs60zha54z2x2y-qjson-0.7.1 45hgr3fbnr45n795hn2x7hsymp0h2j2m-libjpeg-8c 4kw1b212s80ap2iyibxrimcqb5imhfj7-libkexiv2-4.7.4 7dvylm5crlc0sfafcc0n46mb5ch67q0j-glibc-2.13 a05cbh1awjbl1rbyb2ynyf4k42v5a9a7-boost-1.47.0 a1jjalr4csx9hcga7fnm122aqabrjnch-digikam-2.6.0 aav5ffg8wlnilgnvdb2jnrv2aam4zmmz-perl-5.14.2 ab0m9h30nsr13w48qriv0k350kmwx567-kdelibs-4.7.4 avffkd49cqvpwdkzry8bn69dkbw4cy29-lensfun-0.2.5 cy8rl8h4yp2j3h8987vkklg328q3wmjz-gcc-4.6.3 dmmh5ihyg1r2dm4azgsfj2kprj92czlg-libSM-1.2.0 fl56j5n4shfw9c0r6vs2i4f1h9zx5kac-soprano-2.7.6 g15cmvh15ggdjcwapskngv20q4yhix40-jasper-1.900.1 i04maxd0din6v92rnqcwl9yra0kl2vk5-marble-4.7.4 kqjjb3m26rdddwwwkk8v45821aps877k-libICE-1.0.7 lxz9r135wkndvi642z4bjgmvyypsgirb-libtiff-3.9.4 m9c8i0a6cl30lcqp654dqkbag3wjmd00-libX11-1.4.1 mpnj4k2ijrgyfkh48fg96nzcmklfh5pl-coreutils-8.15 nppljblap477s0893c151lyq7r7n5v1q-zlib-1.2.7 nw9mdbyp8kyn3v4vkdzq0gsnqbc4mnx3-expat-2.0.1 p1a0dn931mzdkvj6h5yzshbmgxba5r0z-libgphoto2-2.4.11 pvjj07xa1cfkad3gwk376nzdrgknbcqm-mesa-7.11.2 pzcxag98jqccp9ycbxknyh0w95pgnsk4-lcms-1.19 qfi5pgds33kg6vlnxsmj0hyl74vcmyiz-libpng-1.5.10 scm6bj86s3qh3s3x0b9ayjp6755p4q86-mysql-5.1.54 sd23qspcyg385va0lr35xgz3hvlqphg6-libkipi-4.7.4 svmbrhc6kzfzakv20a7zrfl6kbr5mfpq-kdepimlibs-4.7.4 v7kh3h7xfwjz4hgffg3gwrfzjff9bw9d-bash-4.2-p24 vi17f22064djgpk0w248da348q8gxkww-libkdcraw-4.7.4 wkjdzmj3z4dcbsc9f833zs6krdgg2krk-phonon-4.6.0 xf3i3awqi0035ixy2qyb6hk4c92r3vrn-opencv-2.4.2 y1vr0nz8i59x59501020nh2k1dw3bhwq-libusb-0.1.12 yf3hin2hb6i08n7zrk8g3acy54rhg9bp-libXext-1.2.0 +Deriver: la77dr44phk5m5jnl4dvk01cwpykyw9s-digikam-2.6.0.drv +System: i686-linux +Sig: cache.nixos.org-1:92fl0i5q7EyegCj5Yf4L0bENkWuVAtgveiRcTEEUH0P6HvCE1xFcPbz/0Pf6Np+K1LPzHK+s5RHOmVoxRsvsDg== +"#).expect("should parse"); + + assert!(parsed.flags.contains(Flags::COMPRESSION_DEFAULT)); + assert_eq!(parsed.compression, Some("bzip2")); + } + + #[test] + fn nar_hash_hex() { + let parsed = NarInfo::parse(r#"StorePath: /nix/store/0vpqfxbkx0ffrnhbws6g9qwhmliksz7f-perl-HTTP-Cookies-6.01 +URL: nar/1rv1m9inydm1r4krw8hmwg1hs86d0nxddd1pbhihx7l7fycjvfk3.nar.xz +Compression: xz +FileHash: sha256:1rv1m9inydm1r4krw8hmwg1hs86d0nxddd1pbhihx7l7fycjvfk3 +FileSize: 19912 +NarHash: sha256:60adfd293a4d81ad7cd7e47263cbb3fc846309ef91b154a08ba672b558f94ff3 +NarSize: 45840 +References: 0vpqfxbkx0ffrnhbws6g9qwhmliksz7f-perl-HTTP-Cookies-6.01 9vrhbib2lxd9pjlg6fnl5b82gblidrcr-perl-HTTP-Message-6.06 wy20zslqxzxxfpzzk0rajh41d7a6mlnf-perl-HTTP-Date-6.02 +Deriver: fb4ihlq3psnsjq95mvvs49rwpplpc8zj-perl-HTTP-Cookies-6.01.drv +Sig: cache.nixos.org-1:HhaiY36Uk3XV1JGe9d9xHnzAapqJXprU1YZZzSzxE97jCuO5RR7vlG2kF7MSC5thwRyxAtdghdSz3AqFi+QSCw== +"#).expect("should parse"); + + assert!(parsed.flags.contains(Flags::NAR_HASH_HEX)); + assert_eq!( + hex!("60adfd293a4d81ad7cd7e47263cbb3fc846309ef91b154a08ba672b558f94ff3"), + parsed.nar_hash, + ); + } +} diff --git a/tvix/nix-compat/src/narinfo/public_keys.rs b/tvix/nix-compat/src/narinfo/public_keys.rs new file mode 100644 index 0000000000..27dd90e096 --- /dev/null +++ b/tvix/nix-compat/src/narinfo/public_keys.rs @@ -0,0 +1,152 @@ +//! This module defines data structures and parsers for the public key format +//! used inside Nix to verify signatures on .narinfo files. + +use std::fmt::Display; + +use data_encoding::BASE64; +use ed25519_dalek::{VerifyingKey, PUBLIC_KEY_LENGTH}; + +use super::Signature; + +/// This represents a ed25519 public key and "name". +/// These are normally passed in the `trusted-public-keys` Nix config option, +/// and consist of a name and base64-encoded ed25519 pubkey, separated by a `:`. +#[derive(Debug)] +pub struct PubKey { + name: String, + verifying_key: VerifyingKey, +} + +impl PubKey { + pub fn new(name: String, verifying_key: VerifyingKey) -> Self { + Self { + name, + verifying_key, + } + } + + pub fn parse(input: &str) -> Result<Self, Error> { + let (name, bytes64) = input.split_once(':').ok_or(Error::MissingSeparator)?; + + if name.is_empty() + || !name + .chars() + .all(|c| char::is_alphanumeric(c) || c == '-' || c == '.') + { + return Err(Error::InvalidName(name.to_string())); + } + + if bytes64.len() != BASE64.encode_len(PUBLIC_KEY_LENGTH) { + return Err(Error::InvalidPubKeyLen(bytes64.len())); + } + + let mut buf = [0; PUBLIC_KEY_LENGTH + 1]; + let mut bytes = [0; PUBLIC_KEY_LENGTH]; + match BASE64.decode_mut(bytes64.as_bytes(), &mut buf) { + Ok(PUBLIC_KEY_LENGTH) => { + bytes.copy_from_slice(&buf[..PUBLIC_KEY_LENGTH]); + } + Ok(_) => unreachable!(), + // keeping DecodePartial gets annoying lifetime-wise + Err(_) => return Err(Error::DecodeError(input.to_string())), + } + + let verifying_key = VerifyingKey::from_bytes(&bytes).map_err(Error::InvalidVerifyingKey)?; + + Ok(Self { + name: name.to_string(), + verifying_key, + }) + } + + pub fn name(&self) -> &str { + &self.name + } + + /// Verify the passed in signature is a correct signature for the passed in fingerprint and is signed + /// by the key material referred to by [Self], + /// which means the name in the signature has to match, + /// and the signature bytes themselves need to be a valid signature made by + /// the signing key identified by [Self::verifying key]. + pub fn verify(&self, fingerprint: &str, signature: &Signature) -> bool { + if self.name() != signature.name() { + return false; + } + + return signature.verify(fingerprint.as_bytes(), &self.verifying_key); + } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Invalid name: {0}")] + InvalidName(String), + #[error("Missing separator")] + MissingSeparator, + #[error("Invalid pubkey len: {0}")] + InvalidPubKeyLen(usize), + #[error("VerifyingKey error: {0}")] + InvalidVerifyingKey(ed25519_dalek::SignatureError), + #[error("Unable to base64-decode pubkey: {0}")] + DecodeError(String), +} + +impl Display for PubKey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}:{}", + self.name, + BASE64.encode(self.verifying_key.as_bytes()) + ) + } +} + +#[cfg(test)] +mod test { + use data_encoding::BASE64; + use ed25519_dalek::PUBLIC_KEY_LENGTH; + use rstest::rstest; + + use crate::narinfo::Signature; + + use super::PubKey; + const FINGERPRINT: &str = "1;/nix/store/syd87l2rxw8cbsxmxl853h0r6pdwhwjr-curl-7.82.0-bin;sha256:1b4sb93wp679q4zx9k1ignby1yna3z7c4c2ri3wphylbc2dwsys0;196040;/nix/store/0jqd0rlxzra1rs38rdxl43yh6rxchgc6-curl-7.82.0,/nix/store/6w8g7njm4mck5dmjxws0z1xnrxvl81xa-glibc-2.34-115,/nix/store/j5jxw3iy7bbz4a57fh9g2xm2gxmyal8h-zlib-1.2.12,/nix/store/yxvjs9drzsphm9pcf42a4byzj1kb9m7k-openssl-1.1.1n"; + + #[rstest] + #[case::cache_nixos_org("cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=", "cache.nixos.org-1", &BASE64.decode(b"6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=").unwrap()[..].try_into().unwrap())] + #[case::cache_nixos_org_different_name("cheesecake:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=", "cheesecake", &BASE64.decode(b"6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=").unwrap()[..].try_into().unwrap())] + #[case::test_1("test1:tLAEn+EeaBUJYqEpTd2yeerr7Ic6+0vWe+aXL/vYUpE=", "test1", &BASE64.decode(b"tLAEn+EeaBUJYqEpTd2yeerr7Ic6+0vWe+aXL/vYUpE=").unwrap()[..].try_into().unwrap())] + fn parse( + #[case] input: &'static str, + #[case] exp_name: &'static str, + #[case] exp_verifying_key_bytes: &[u8; PUBLIC_KEY_LENGTH], + ) { + let pubkey = PubKey::parse(input).expect("must parse"); + assert_eq!(exp_name, pubkey.name()); + assert_eq!(exp_verifying_key_bytes, pubkey.verifying_key.as_bytes()); + } + + #[rstest] + #[case::empty_name("6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=")] + #[case::missing_padding("cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY")] + #[case::wrong_length("cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDS")] + fn parse_fail(#[case] input: &'static str) { + PubKey::parse(input).expect_err("must fail"); + } + + #[rstest] + #[case::correct_cache_nixos_org("cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=", FINGERPRINT, "cache.nixos.org-1:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==", true)] + #[case::wrong_name_mismatch("cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=", FINGERPRINT, "cache.nixos.org:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==", false)] + fn verify( + #[case] pubkey_str: &'static str, + #[case] fingerprint: &'static str, + #[case] signature_str: &'static str, + #[case] expected: bool, + ) { + let pubkey = PubKey::parse(pubkey_str).expect("must parse"); + let signature = Signature::parse(signature_str).expect("must parse"); + + assert_eq!(expected, pubkey.verify(fingerprint, &signature)); + } +} diff --git a/tvix/nix-compat/src/narinfo/signature.rs b/tvix/nix-compat/src/narinfo/signature.rs new file mode 100644 index 0000000000..fd197e771d --- /dev/null +++ b/tvix/nix-compat/src/narinfo/signature.rs @@ -0,0 +1,184 @@ +use std::fmt::{self, Display}; + +use data_encoding::BASE64; +use ed25519_dalek::SIGNATURE_LENGTH; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Signature<'a> { + name: &'a str, + bytes: [u8; SIGNATURE_LENGTH], +} + +impl<'a> Signature<'a> { + pub fn new(name: &'a str, bytes: [u8; SIGNATURE_LENGTH]) -> Self { + Self { name, bytes } + } + + pub fn parse(input: &'a str) -> Result<Self, Error> { + let (name, bytes64) = input.split_once(':').ok_or(Error::MissingSeparator)?; + + if name.is_empty() + || !name + .chars() + .all(|c| char::is_alphanumeric(c) || c == '-' || c == '.') + { + return Err(Error::InvalidName(name.to_string())); + } + + if bytes64.len() != BASE64.encode_len(SIGNATURE_LENGTH) { + return Err(Error::InvalidSignatureLen(bytes64.len())); + } + + let mut bytes = [0; SIGNATURE_LENGTH]; + let mut buf = [0; SIGNATURE_LENGTH + 2]; + match BASE64.decode_mut(bytes64.as_bytes(), &mut buf) { + Ok(SIGNATURE_LENGTH) => bytes.copy_from_slice(&buf[..SIGNATURE_LENGTH]), + Ok(_) => unreachable!(), + // keeping DecodePartial gets annoying lifetime-wise + Err(_) => return Err(Error::DecodeError(input.to_string())), + } + + Ok(Signature { name, bytes }) + } + + pub fn name(&self) -> &'a str { + self.name + } + + pub fn bytes(&self) -> &[u8; SIGNATURE_LENGTH] { + &self.bytes + } + + /// For a given fingerprint and ed25519 verifying key, ensure if the signature is valid. + pub fn verify(&self, fingerprint: &[u8], verifying_key: &ed25519_dalek::VerifyingKey) -> bool { + let signature = ed25519_dalek::Signature::from_bytes(self.bytes()); + + verifying_key.verify_strict(fingerprint, &signature).is_ok() + } +} + +impl<'de: 'a, 'a> Deserialize<'de> for Signature<'a> { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + let str: &'de str = Deserialize::deserialize(deserializer)?; + Self::parse(str).map_err(|_| { + serde::de::Error::invalid_value(serde::de::Unexpected::Str(str), &"Signature") + }) + } +} + +impl<'a> Serialize for Signature<'a> { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + let string: String = self.to_string(); + + string.serialize(serializer) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Invalid name: {0}")] + InvalidName(String), + #[error("Missing separator")] + MissingSeparator, + #[error("Invalid signature len: (expected {} b64-encoded, got {}", BASE64.encode_len(SIGNATURE_LENGTH), .0)] + InvalidSignatureLen(usize), + #[error("Unable to base64-decode signature: {0}")] + DecodeError(String), +} + +impl Display for Signature<'_> { + fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { + write!(w, "{}:{}", self.name, BASE64.encode(&self.bytes)) + } +} + +#[cfg(test)] +mod test { + use data_encoding::BASE64; + use ed25519_dalek::VerifyingKey; + use hex_literal::hex; + use lazy_static::lazy_static; + + use super::Signature; + use rstest::rstest; + + const FINGERPRINT: &str = "1;/nix/store/syd87l2rxw8cbsxmxl853h0r6pdwhwjr-curl-7.82.0-bin;sha256:1b4sb93wp679q4zx9k1ignby1yna3z7c4c2ri3wphylbc2dwsys0;196040;/nix/store/0jqd0rlxzra1rs38rdxl43yh6rxchgc6-curl-7.82.0,/nix/store/6w8g7njm4mck5dmjxws0z1xnrxvl81xa-glibc-2.34-115,/nix/store/j5jxw3iy7bbz4a57fh9g2xm2gxmyal8h-zlib-1.2.12,/nix/store/yxvjs9drzsphm9pcf42a4byzj1kb9m7k-openssl-1.1.1n"; + + // The signing key labelled as `cache.nixos.org-1`, + lazy_static! { + static ref PUB_CACHE_NIXOS_ORG_1: VerifyingKey = ed25519_dalek::VerifyingKey::from_bytes( + BASE64 + .decode(b"6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=") + .unwrap()[..] + .try_into() + .unwrap() + ) + .unwrap(); + static ref PUB_TEST_1: VerifyingKey = ed25519_dalek::VerifyingKey::from_bytes( + BASE64 + .decode(b"tLAEn+EeaBUJYqEpTd2yeerr7Ic6+0vWe+aXL/vYUpE=") + .unwrap()[..] + .try_into() + .unwrap() + ) + .unwrap(); + } + + #[rstest] + #[case::valid_cache_nixos_org_1(&PUB_CACHE_NIXOS_ORG_1, &"cache.nixos.org-1:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==", FINGERPRINT, true)] + #[case::valid_test1(&PUB_CACHE_NIXOS_ORG_1, &"cache.nixos.org-1:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==", FINGERPRINT, true)] + #[case::valid_cache_nixos_org_different_name(&PUB_CACHE_NIXOS_ORG_1, &"cache.nixos.org-2:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==", FINGERPRINT, true)] + #[case::fail_invalid_cache_nixos_org_1_signature(&PUB_CACHE_NIXOS_ORG_1, &"cache.nixos.org-1:TsTTb000000000000000000000000ytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==", FINGERPRINT, false)] + #[case::fail_valid_sig_but_wrong_fp_cache_nixos_org_1(&PUB_CACHE_NIXOS_ORG_1, &"cache.nixos.org-1:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==", &FINGERPRINT[0..5], false)] + fn verify_sigs( + #[case] verifying_key: &VerifyingKey, + #[case] sig_str: &'static str, + #[case] fp: &str, + #[case] expect_valid: bool, + ) { + let sig = Signature::parse(sig_str).expect("must parse"); + assert_eq!(expect_valid, sig.verify(fp.as_bytes(), verifying_key)); + } + + #[rstest] + #[case::wrong_length("cache.nixos.org-1:o1DTsjCz0PofLJ216P2RBuSulI8BAb6zHxWE4N+tzlcELk5Uk/GO2SCxWTRN5wJutLZZ+cHTMdWqOHF8")] + #[case::wrong_name_newline("test\n:u01BybwQhyI5H1bW1EIWXssMDhDDIvXOG5uh8Qzgdyjz6U1qg6DHhMAvXZOUStIj6X5t4/ufFgR8i3fjf0bMAw==")] + #[case::wrong_name_space("test :u01BybwQhyI5H1bW1EIWXssMDhDDIvXOG5uh8Qzgdyjz6U1qg6DHhMAvXZOUStIj6X5t4/ufFgR8i3fjf0bMAw==")] + #[case::empty_name( + ":u01BybwQhyI5H1bW1EIWXssMDhDDIvXOG5uh8Qzgdyjz6U1qg6DHhMAvXZOUStIj6X5t4/ufFgR8i3fjf0bMAw==" + )] + #[case::b64_only( + "u01BybwQhyI5H1bW1EIWXssMDhDDIvXOG5uh8Qzgdyjz6U1qg6DHhMAvXZOUStIj6X5t4/ufFgR8i3fjf0bMAw==" + )] + fn parse_fail(#[case] input: &'static str) { + Signature::parse(input).expect_err("must fail"); + } + + #[test] + fn serialize_deserialize() { + let signature_actual = Signature { + name: "cache.nixos.org-1", + bytes: hex!( + r#"4e c4 d3 6f 75 86 4d 92 a9 86 f6 1d 04 75 f0 a3 + ac 1e 54 82 e6 4f 2b 54 8c b0 7e bd c5 fc f5 f3 + a3 8d 18 9c 08 79 8a 03 84 42 3c c5 4b 92 3e 93 + 30 9e 06 31 7d c7 3d 55 91 74 3d 61 91 e2 99 05"# + ), + }; + let signature_str_json = "\"cache.nixos.org-1:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==\""; + + let serialized = serde_json::to_string(&signature_actual).expect("must serialize"); + assert_eq!(signature_str_json, &serialized); + + let deserialized: Signature<'_> = + serde_json::from_str(signature_str_json).expect("must deserialize"); + assert_eq!(&signature_actual, &deserialized); + } +} diff --git a/tvix/nix-compat/src/nix_daemon/mod.rs b/tvix/nix-compat/src/nix_daemon/mod.rs new file mode 100644 index 0000000000..fe652377d1 --- /dev/null +++ b/tvix/nix-compat/src/nix_daemon/mod.rs @@ -0,0 +1,4 @@ +pub mod worker_protocol; + +mod protocol_version; +pub use protocol_version::ProtocolVersion; diff --git a/tvix/nix-compat/src/nix_daemon/protocol_version.rs b/tvix/nix-compat/src/nix_daemon/protocol_version.rs new file mode 100644 index 0000000000..8fd2b085c9 --- /dev/null +++ b/tvix/nix-compat/src/nix_daemon/protocol_version.rs @@ -0,0 +1,123 @@ +/// Protocol versions are represented as a u16. +/// The upper 8 bits are the major version, the lower bits the minor. +/// This is not aware of any endianness, use [crate::wire::read_u64] to get an +/// u64 first, and the try_from() impl from here if you're receiving over the +/// Nix Worker protocol. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub struct ProtocolVersion(u16); + +impl ProtocolVersion { + pub const fn from_parts(major: u8, minor: u8) -> Self { + Self(((major as u16) << 8) | minor as u16) + } + + pub fn major(&self) -> u8 { + ((self.0 & 0xff00) >> 8) as u8 + } + + pub fn minor(&self) -> u8 { + (self.0 & 0x00ff) as u8 + } +} + +impl PartialOrd for ProtocolVersion { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for ProtocolVersion { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + match self.major().cmp(&other.major()) { + std::cmp::Ordering::Less => std::cmp::Ordering::Less, + std::cmp::Ordering::Greater => std::cmp::Ordering::Greater, + std::cmp::Ordering::Equal => { + // same major, compare minor + self.minor().cmp(&other.minor()) + } + } + } +} + +impl From<u16> for ProtocolVersion { + fn from(value: u16) -> Self { + Self::from_parts(((value & 0xff00) >> 8) as u8, (value & 0x00ff) as u8) + } +} + +impl TryFrom<u64> for ProtocolVersion { + type Error = &'static str; + + fn try_from(value: u64) -> Result<Self, Self::Error> { + if value & !0xffff != 0 { + return Err("only two least significant bits might be populated"); + } + + Ok((value as u16).into()) + } +} + +impl From<ProtocolVersion> for u16 { + fn from(value: ProtocolVersion) -> Self { + value.0 + } +} + +impl From<ProtocolVersion> for u64 { + fn from(value: ProtocolVersion) -> Self { + value.0 as u64 + } +} + +impl std::fmt::Display for ProtocolVersion { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}.{}", self.major(), self.minor()) + } +} + +#[cfg(test)] +mod tests { + use super::ProtocolVersion; + + #[test] + fn from_parts() { + let version = ProtocolVersion::from_parts(1, 37); + assert_eq!(version.major(), 1, "correct major"); + assert_eq!(version.minor(), 37, "correct minor"); + assert_eq!("1.37", &version.to_string(), "to_string"); + + assert_eq!(0x0125, Into::<u16>::into(version)); + assert_eq!(0x0125, Into::<u64>::into(version)); + } + + #[test] + fn from_u16() { + let version = ProtocolVersion::from(0x0125_u16); + assert_eq!("1.37", &version.to_string()); + } + + #[test] + fn from_u64() { + let version = ProtocolVersion::try_from(0x0125_u64).expect("must succeed"); + assert_eq!("1.37", &version.to_string()); + } + + /// This contains data in higher bits, which should fail. + #[test] + fn from_u64_fail() { + ProtocolVersion::try_from(0xaa0125_u64).expect_err("must fail"); + } + + #[test] + fn ord() { + let v0_37 = ProtocolVersion::from_parts(0, 37); + let v1_37 = ProtocolVersion::from_parts(1, 37); + let v1_40 = ProtocolVersion::from_parts(1, 40); + + assert!(v0_37 < v1_37); + assert!(v1_37 > v0_37); + assert!(v1_37 < v1_40); + assert!(v1_40 > v1_37); + assert!(v1_40 <= v1_40); + } +} diff --git a/tvix/nix-compat/src/nix_daemon/worker_protocol.rs b/tvix/nix-compat/src/nix_daemon/worker_protocol.rs new file mode 100644 index 0000000000..7e3adc0db2 --- /dev/null +++ b/tvix/nix-compat/src/nix_daemon/worker_protocol.rs @@ -0,0 +1,434 @@ +use std::{ + collections::HashMap, + io::{Error, ErrorKind}, +}; + +use enum_primitive_derive::Primitive; +use num_traits::{FromPrimitive, ToPrimitive}; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; + +use crate::wire; + +use super::ProtocolVersion; + +static WORKER_MAGIC_1: u64 = 0x6e697863; // "nixc" +static WORKER_MAGIC_2: u64 = 0x6478696f; // "dxio" +pub static STDERR_LAST: u64 = 0x616c7473; // "alts" + +/// | Nix version | Protocol | +/// |-----------------|----------| +/// | 0.11 | 1.02 | +/// | 0.12 | 1.04 | +/// | 0.13 | 1.05 | +/// | 0.14 | 1.05 | +/// | 0.15 | 1.05 | +/// | 0.16 | 1.06 | +/// | 1.0 | 1.10 | +/// | 1.1 | 1.11 | +/// | 1.2 | 1.12 | +/// | 1.3 - 1.5.3 | 1.13 | +/// | 1.6 - 1.10 | 1.14 | +/// | 1.11 - 1.11.16 | 1.15 | +/// | 2.0 - 2.0.4 | 1.20 | +/// | 2.1 - 2.3.18 | 1.21 | +/// | 2.4 - 2.6.1 | 1.32 | +/// | 2.7.0 | 1.33 | +/// | 2.8.0 - 2.14.1 | 1.34 | +/// | 2.15.0 - 2.19.4 | 1.35 | +/// | 2.20.0 - 2.22.0 | 1.37 | +static PROTOCOL_VERSION: ProtocolVersion = ProtocolVersion::from_parts(1, 37); + +/// Max length of a Nix setting name/value. In bytes. +/// +/// This value has been arbitrarily choosen after looking the nix.conf +/// manpage. Don't hesitate to increase it if it's too limiting. +pub static MAX_SETTING_SIZE: usize = 1024; + +/// Worker Operation +/// +/// These operations are encoded as unsigned 64 bits before being sent +/// to the wire. See the [read_op] and +/// [write_op] operations to serialize/deserialize the +/// operation on the wire. +/// +/// Note: for now, we're using the Nix 2.20 operation description. The +/// operations marked as obsolete are obsolete for Nix 2.20, not +/// necessarily for Nix 2.3. We'll revisit this later on. +#[derive(Debug, PartialEq, Primitive)] +pub enum Operation { + IsValidPath = 1, + HasSubstitutes = 3, + QueryPathHash = 4, // obsolete + QueryReferences = 5, // obsolete + QueryReferrers = 6, + AddToStore = 7, + AddTextToStore = 8, // obsolete since 1.25, Nix 3.0. Use WorkerProto::Op::AddToStore + BuildPaths = 9, + EnsurePath = 10, + AddTempRoot = 11, + AddIndirectRoot = 12, + SyncWithGC = 13, + FindRoots = 14, + ExportPath = 16, // obsolete + QueryDeriver = 18, // obsolete + SetOptions = 19, + CollectGarbage = 20, + QuerySubstitutablePathInfo = 21, + QueryDerivationOutputs = 22, // obsolete + QueryAllValidPaths = 23, + QueryFailedPaths = 24, + ClearFailedPaths = 25, + QueryPathInfo = 26, + ImportPaths = 27, // obsolete + QueryDerivationOutputNames = 28, // obsolete + QueryPathFromHashPart = 29, + QuerySubstitutablePathInfos = 30, + QueryValidPaths = 31, + QuerySubstitutablePaths = 32, + QueryValidDerivers = 33, + OptimiseStore = 34, + VerifyStore = 35, + BuildDerivation = 36, + AddSignatures = 37, + NarFromPath = 38, + AddToStoreNar = 39, + QueryMissing = 40, + QueryDerivationOutputMap = 41, + RegisterDrvOutput = 42, + QueryRealisation = 43, + AddMultipleToStore = 44, + AddBuildLog = 45, + BuildPathsWithResults = 46, + AddPermRoot = 47, +} + +/// Log verbosity. In the Nix wire protocol, the client requests a +/// verbosity level to the daemon, which in turns does not produce any +/// log below this verbosity. +#[derive(Debug, PartialEq, Primitive)] +pub enum Verbosity { + LvlError = 0, + LvlWarn = 1, + LvlNotice = 2, + LvlInfo = 3, + LvlTalkative = 4, + LvlChatty = 5, + LvlDebug = 6, + LvlVomit = 7, +} + +/// Settings requested by the client. These settings are applied to a +/// connection to between the daemon and a client. +#[derive(Debug, PartialEq)] +pub struct ClientSettings { + pub keep_failed: bool, + pub keep_going: bool, + pub try_fallback: bool, + pub verbosity: Verbosity, + pub max_build_jobs: u64, + pub max_silent_time: u64, + pub verbose_build: bool, + pub build_cores: u64, + pub use_substitutes: bool, + /// Key/Value dictionary in charge of overriding the settings set + /// by the Nix config file. + /// + /// Some settings can be safely overidden, + /// some other require the user running the Nix client to be part + /// of the trusted users group. + pub overrides: HashMap<String, String>, +} + +/// Reads the client settings from the wire. +/// +/// Note: this function **only** reads the settings. It does not +/// manage the log state with the daemon. You'll have to do that on +/// your own. A minimal log implementation will consist in sending +/// back [STDERR_LAST] to the client after reading the client +/// settings. +/// +/// FUTUREWORK: write serialization. +pub async fn read_client_settings<R: AsyncReadExt + Unpin>( + r: &mut R, + client_version: ProtocolVersion, +) -> std::io::Result<ClientSettings> { + let keep_failed = r.read_u64_le().await? != 0; + let keep_going = r.read_u64_le().await? != 0; + let try_fallback = r.read_u64_le().await? != 0; + let verbosity_uint = r.read_u64_le().await?; + let verbosity = Verbosity::from_u64(verbosity_uint).ok_or_else(|| { + Error::new( + ErrorKind::InvalidData, + format!("Can't convert integer {} to verbosity", verbosity_uint), + ) + })?; + let max_build_jobs = r.read_u64_le().await?; + let max_silent_time = r.read_u64_le().await?; + _ = r.read_u64_le().await?; // obsolete useBuildHook + let verbose_build = r.read_u64_le().await? != 0; + _ = r.read_u64_le().await?; // obsolete logType + _ = r.read_u64_le().await?; // obsolete printBuildTrace + let build_cores = r.read_u64_le().await?; + let use_substitutes = r.read_u64_le().await? != 0; + let mut overrides = HashMap::new(); + if client_version.minor() >= 12 { + let num_overrides = r.read_u64_le().await?; + for _ in 0..num_overrides { + let name = wire::read_string(r, 0..=MAX_SETTING_SIZE).await?; + let value = wire::read_string(r, 0..=MAX_SETTING_SIZE).await?; + overrides.insert(name, value); + } + } + Ok(ClientSettings { + keep_failed, + keep_going, + try_fallback, + verbosity, + max_build_jobs, + max_silent_time, + verbose_build, + build_cores, + use_substitutes, + overrides, + }) +} + +/// Performs the initial handshake the server is sending to a connecting client. +/// +/// During the handshake, the client first send a magic u64, to which +/// the daemon needs to respond with another magic u64. +/// Then, the daemon retrieves the client version, and discards a bunch of now +/// obsolete data. +/// +/// # Arguments +/// +/// * conn: connection with the Nix client. +/// * nix_version: semantic version of the Nix daemon. "2.18.2" for +/// instance. +/// * trusted: trust level of the Nix client. +/// +/// # Return +/// +/// The protocol version of the client. +pub async fn server_handshake_client<'a, RW: 'a>( + mut conn: &'a mut RW, + nix_version: &str, + trusted: Trust, +) -> std::io::Result<ProtocolVersion> +where + &'a mut RW: AsyncReadExt + AsyncWriteExt + Unpin, +{ + let worker_magic_1 = conn.read_u64_le().await?; + if worker_magic_1 != WORKER_MAGIC_1 { + Err(std::io::Error::new( + ErrorKind::InvalidData, + format!("Incorrect worker magic number received: {}", worker_magic_1), + )) + } else { + conn.write_u64_le(WORKER_MAGIC_2).await?; + conn.write_u64_le(PROTOCOL_VERSION.into()).await?; + conn.flush().await?; + let client_version = conn.read_u64_le().await?; + // Parse into ProtocolVersion. + let client_version: ProtocolVersion = client_version + .try_into() + .map_err(|e| Error::new(ErrorKind::Unsupported, e))?; + if client_version < ProtocolVersion::from_parts(1, 10) { + return Err(Error::new( + ErrorKind::Unsupported, + format!("The nix client version {} is too old", client_version), + )); + } + if client_version.minor() >= 14 { + // Obsolete CPU affinity. + let read_affinity = conn.read_u64_le().await?; + if read_affinity != 0 { + let _cpu_affinity = conn.read_u64_le().await?; + }; + } + if client_version.minor() >= 11 { + // Obsolete reserveSpace + let _reserve_space = conn.read_u64_le().await?; + } + if client_version.minor() >= 33 { + // Nix version. We're plain lying, we're not Nix, but eh… + // Setting it to the 2.3 lineage. Not 100% sure this is a + // good idea. + wire::write_bytes(&mut conn, nix_version).await?; + conn.flush().await?; + } + if client_version.minor() >= 35 { + write_worker_trust_level(&mut conn, trusted).await?; + } + Ok(client_version) + } +} + +/// Read a worker [Operation] from the wire. +pub async fn read_op<R: AsyncReadExt + Unpin>(r: &mut R) -> std::io::Result<Operation> { + let op_number = r.read_u64_le().await?; + Operation::from_u64(op_number).ok_or(Error::new( + ErrorKind::InvalidData, + format!("Invalid OP number {}", op_number), + )) +} + +/// Write a worker [Operation] to the wire. +pub async fn write_op<W: AsyncWriteExt + Unpin>(w: &mut W, op: &Operation) -> std::io::Result<()> { + let op = Operation::to_u64(op).ok_or(Error::new( + ErrorKind::Other, + format!("Can't convert the OP {:?} to u64", op), + ))?; + w.write_u64(op).await +} + +#[derive(Debug, PartialEq)] +pub enum Trust { + Trusted, + NotTrusted, +} + +/// Write the worker [Trust] level to the wire. +/// +/// Cpp Nix has a legacy third option: u8 0. This option is meant to +/// be used as a backward compatible measure. Since we're not +/// targetting protocol versions pre-dating the trust notion, we +/// decided not to implement it here. +pub async fn write_worker_trust_level<W>(conn: &mut W, t: Trust) -> std::io::Result<()> +where + W: AsyncReadExt + AsyncWriteExt + Unpin, +{ + match t { + Trust::Trusted => conn.write_u64_le(1).await, + Trust::NotTrusted => conn.write_u64_le(2).await, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use hex_literal::hex; + use tokio_test::io::Builder; + + #[tokio::test] + async fn test_init_hanshake() { + let mut test_conn = tokio_test::io::Builder::new() + .read(&WORKER_MAGIC_1.to_le_bytes()) + .write(&WORKER_MAGIC_2.to_le_bytes()) + .write(&[37, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) + // Let's say the client is in sync with the daemon + // protocol-wise + .read(&[37, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) + // cpu affinity + .read(&[0; 8]) + // reservespace + .read(&[0; 8]) + // version (size) + .write(&[0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) + // version (data == 2.18.2 + padding) + .write(&[50, 46, 49, 56, 46, 50, 0, 0]) + // Trusted (1 == client trusted + .write(&[1, 0, 0, 0, 0, 0, 0, 0]) + .build(); + let client_version = server_handshake_client(&mut test_conn, "2.18.2", Trust::Trusted) + .await + .unwrap(); + + assert_eq!(client_version, PROTOCOL_VERSION) + } + + #[tokio::test] + async fn test_read_client_settings_without_overrides() { + // Client settings bits captured from a Nix 2.3.17 run w/ sockdump (protocol version 21). + let wire_bits = hex!( + "00 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00 \ + 02 00 00 00 00 00 00 00 \ + 10 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00 \ + 01 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00 \ + 01 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00" + ); + let mut mock = Builder::new().read(&wire_bits).build(); + let settings = read_client_settings(&mut mock, ProtocolVersion::from_parts(1, 21)) + .await + .expect("should parse"); + let expected = ClientSettings { + keep_failed: false, + keep_going: false, + try_fallback: false, + verbosity: Verbosity::LvlNotice, + max_build_jobs: 16, + max_silent_time: 0, + verbose_build: false, + build_cores: 0, + use_substitutes: true, + overrides: HashMap::new(), + }; + assert_eq!(settings, expected); + } + + #[tokio::test] + async fn test_read_client_settings_with_overrides() { + // Client settings bits captured from a Nix 2.3.17 run w/ sockdump (protocol version 21). + let wire_bits = hex!( + "00 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00 \ + 02 00 00 00 00 00 00 00 \ + 10 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00 \ + 01 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00 \ + 00 00 00 00 00 00 00 00 \ + 01 00 00 00 00 00 00 00 \ + 02 00 00 00 00 00 00 00 \ + 0c 00 00 00 00 00 00 00 \ + 61 6c 6c 6f 77 65 64 2d \ + 75 72 69 73 00 00 00 00 \ + 1e 00 00 00 00 00 00 00 \ + 68 74 74 70 73 3a 2f 2f \ + 62 6f 72 64 65 61 75 78 \ + 2e 67 75 69 78 2e 67 6e \ + 75 2e 6f 72 67 2f 00 00 \ + 0d 00 00 00 00 00 00 00 \ + 61 6c 6c 6f 77 65 64 2d \ + 75 73 65 72 73 00 00 00 \ + 0b 00 00 00 00 00 00 00 \ + 6a 65 61 6e 20 70 69 65 \ + 72 72 65 00 00 00 00 00" + ); + let mut mock = Builder::new().read(&wire_bits).build(); + let settings = read_client_settings(&mut mock, ProtocolVersion::from_parts(1, 21)) + .await + .expect("should parse"); + let overrides = HashMap::from([ + ( + String::from("allowed-uris"), + String::from("https://bordeaux.guix.gnu.org/"), + ), + (String::from("allowed-users"), String::from("jean pierre")), + ]); + let expected = ClientSettings { + keep_failed: false, + keep_going: false, + try_fallback: false, + verbosity: Verbosity::LvlNotice, + max_build_jobs: 16, + max_silent_time: 0, + verbose_build: false, + build_cores: 0, + use_substitutes: true, + overrides, + }; + assert_eq!(settings, expected); + } +} diff --git a/tvix/nix-compat/src/nixbase32.rs b/tvix/nix-compat/src/nixbase32.rs new file mode 100644 index 0000000000..b7ffc1dc2b --- /dev/null +++ b/tvix/nix-compat/src/nixbase32.rs @@ -0,0 +1,206 @@ +//! Implements the slightly odd "base32" encoding that's used in Nix. +//! +//! Nix uses a custom alphabet. Contrary to other implementations (RFC4648), +//! encoding to "nix base32" doesn't use any padding, and reads in characters +//! in reverse order. +//! +//! This is also the main reason why we can't use `data_encoding::Encoding` - +//! it gets things wrong if there normally would be a need for padding. + +use std::fmt::Write; + +use data_encoding::{DecodeError, DecodeKind}; + +const ALPHABET: &[u8; 32] = b"0123456789abcdfghijklmnpqrsvwxyz"; + +/// Returns encoded input +pub fn encode(input: &[u8]) -> String { + let output_len = encode_len(input.len()); + let mut output = String::with_capacity(output_len); + + for n in (0..output_len).rev() { + let b = n * 5; // bit offset within the entire input + let i = b / 8; // input byte index + let j = b % 8; // bit offset within that input byte + + // 5-bit words aren't aligned to bytes + // we can only read byte-aligned units + // read 16 bits then shift and mask to 5 + let c = { + let mut word = input[i] as u16; + if let Some(&msb) = input.get(i + 1) { + word |= (msb as u16) << 8; + } + (word >> j) & 0x1f + }; + + output.write_char(ALPHABET[c as usize] as char).unwrap(); + } + + output +} + +/// This maps a nixbase32-encoded character to its binary representation, which +/// is also the index of the character in the alphabet. Invalid characters are +/// mapped to 0xFF, which is itself an invalid value. +const BASE32_ORD: [u8; 256] = { + let mut ord = [0xFF; 256]; + let mut alphabet = ALPHABET.as_slice(); + let mut i = 0; + + while let &[c, ref tail @ ..] = alphabet { + ord[c as usize] = i; + alphabet = tail; + i += 1; + } + + ord +}; + +/// Returns decoded input +pub fn decode(input: impl AsRef<[u8]>) -> Result<Vec<u8>, DecodeError> { + let input = input.as_ref(); + + let output_len = decode_len(input.len()); + let mut output: Vec<u8> = vec![0x00; output_len]; + + decode_inner(input, &mut output)?; + Ok(output) +} + +pub fn decode_fixed<const K: usize>(input: impl AsRef<[u8]>) -> Result<[u8; K], DecodeError> { + let input = input.as_ref(); + + if input.len() != encode_len(K) { + return Err(DecodeError { + position: input.len().min(encode_len(K)), + kind: DecodeKind::Length, + }); + } + + let mut output = [0; K]; + decode_inner(input, &mut output)?; + Ok(output) +} + +fn decode_inner(input: &[u8], output: &mut [u8]) -> Result<(), DecodeError> { + // loop over all characters in reverse, and keep the iteration count in n. + let mut carry = 0; + let mut mask = 0; + for (n, &c) in input.iter().rev().enumerate() { + let b = n * 5; + let i = b / 8; + let j = b % 8; + + let digit = BASE32_ORD[c as usize]; + let value = (digit as u16) << j; + output[i] |= value as u8 | carry; + carry = (value >> 8) as u8; + + mask |= digit; + } + + if mask == 0xFF { + return Err(DecodeError { + position: find_invalid(input), + kind: DecodeKind::Symbol, + }); + } + + // if we're at the end, but have a nonzero carry, the encoding is invalid. + if carry != 0 { + return Err(DecodeError { + position: 0, + kind: DecodeKind::Trailing, + }); + } + + Ok(()) +} + +fn find_invalid(input: &[u8]) -> usize { + for (i, &c) in input.iter().enumerate() { + if !ALPHABET.contains(&c) { + return i; + } + } + + unreachable!() +} + +/// Returns the decoded length of an input of length len. +pub const fn decode_len(len: usize) -> usize { + (len * 5) / 8 +} + +/// Returns the encoded length of an input of length len +pub const fn encode_len(len: usize) -> usize { + (len * 8 + 4) / 5 +} + +#[cfg(test)] +mod tests { + use hex_literal::hex; + use rstest::rstest; + + #[rstest] + #[case::empty_bytes("", &[])] + #[case::one_byte("0z", &hex!("1f"))] + #[case::store_path("00bgd045z0d4icpbc2yyz4gx48ak44la", &hex!("8a12321522fd91efbd60ebb2481af88580f61600"))] + #[case::sha256("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", &hex!("b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30"))] + #[test] + fn encode(#[case] enc: &str, #[case] dec: &[u8]) { + assert_eq!(enc, super::encode(dec)); + } + + #[rstest] + #[case::empty_bytes("", Some(&[][..]) )] + #[case::one_byte("0z", Some(&hex!("1f")[..]))] + #[case::store_path("00bgd045z0d4icpbc2yyz4gx48ak44la", Some(&hex!("8a12321522fd91efbd60ebb2481af88580f61600")[..]))] + #[case::sha256("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", Some(&hex!("b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30")[..]))] + // this is invalid encoding, because it encodes 10 1-bits, so the carry + // would be 2 1-bits + #[case::invalid_encoding_1("zz", None)] + // this is an even more specific example - it'd decode as 00000000 11 + #[case::invalid_encoding_2("c0", None)] + #[test] + fn decode(#[case] enc: &str, #[case] dec: Option<&[u8]>) { + match dec { + Some(dec) => { + // The decode needs to match what's passed in dec + assert_eq!(dec, super::decode(enc).unwrap()); + } + None => { + // the decode needs to be an error + assert!(super::decode(enc).is_err()); + } + } + } + + #[test] + fn decode_fixed() { + assert_eq!( + super::decode_fixed("00bgd045z0d4icpbc2yyz4gx48ak44la").unwrap(), + hex!("8a12321522fd91efbd60ebb2481af88580f61600") + ); + assert_eq!( + super::decode_fixed::<32>("00").unwrap_err(), + super::DecodeError { + position: 2, + kind: super::DecodeKind::Length + } + ); + } + + #[test] + fn encode_len() { + assert_eq!(super::encode_len(0), 0); + assert_eq!(super::encode_len(20), 32); + } + + #[test] + fn decode_len() { + assert_eq!(super::decode_len(0), 0); + assert_eq!(super::decode_len(32), 20); + } +} diff --git a/tvix/nix-compat/src/nixhash/algos.rs b/tvix/nix-compat/src/nixhash/algos.rs new file mode 100644 index 0000000000..ac8915314c --- /dev/null +++ b/tvix/nix-compat/src/nixhash/algos.rs @@ -0,0 +1,75 @@ +use std::fmt::Display; + +use serde::{Deserialize, Serialize}; + +use crate::nixhash::Error; + +/// This are the hash algorithms supported by cppnix. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum HashAlgo { + Md5, + Sha1, + Sha256, + Sha512, +} + +impl HashAlgo { + // return the number of bytes in the digest of the given hash algo. + pub fn digest_length(&self) -> usize { + match self { + HashAlgo::Sha1 => 20, + HashAlgo::Sha256 => 32, + HashAlgo::Sha512 => 64, + HashAlgo::Md5 => 16, + } + } +} + +impl Display for HashAlgo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self { + HashAlgo::Md5 => write!(f, "md5"), + HashAlgo::Sha1 => write!(f, "sha1"), + HashAlgo::Sha256 => write!(f, "sha256"), + HashAlgo::Sha512 => write!(f, "sha512"), + } + } +} + +impl Serialize for HashAlgo { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + serializer.collect_str(&self) + } +} + +impl<'de> Deserialize<'de> for HashAlgo { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + let s: &str = Deserialize::deserialize(deserializer)?; + HashAlgo::try_from(s).map_err(serde::de::Error::custom) + } +} + +/// TODO(Raito): this could be automated via macros, I suppose. +/// But this may be more expensive than just doing it by hand +/// and ensuring that is kept in sync. +pub const SUPPORTED_ALGOS: [&str; 4] = ["md5", "sha1", "sha256", "sha512"]; + +impl TryFrom<&str> for HashAlgo { + type Error = Error; + + fn try_from(algo_str: &str) -> Result<Self, Self::Error> { + match algo_str { + "md5" => Ok(Self::Md5), + "sha1" => Ok(Self::Sha1), + "sha256" => Ok(Self::Sha256), + "sha512" => Ok(Self::Sha512), + _ => Err(Error::InvalidAlgo(algo_str.to_string())), + } + } +} diff --git a/tvix/nix-compat/src/nixhash/ca_hash.rs b/tvix/nix-compat/src/nixhash/ca_hash.rs new file mode 100644 index 0000000000..2bf5f966ce --- /dev/null +++ b/tvix/nix-compat/src/nixhash/ca_hash.rs @@ -0,0 +1,343 @@ +use crate::nixbase32; +use crate::nixhash::{HashAlgo, NixHash}; +use serde::de::Unexpected; +use serde::ser::SerializeMap; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use serde_json::{Map, Value}; +use std::borrow::Cow; + +use super::algos::SUPPORTED_ALGOS; +use super::decode_digest; + +/// A Nix CAHash describes a content-addressed hash of a path. +/// +/// The way Nix prints it as a string is a bit confusing, but there's essentially +/// three modes, `Flat`, `Nar` and `Text`. +/// `Flat` and `Nar` support all 4 algos that [NixHash] supports +/// (sha1, md5, sha256, sha512), `Text` only supports sha256. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum CAHash { + Flat(NixHash), // "fixed flat" + Nar(NixHash), // "fixed recursive" + Text([u8; 32]), // "text", only supports sha256 +} + +/// Representation for the supported hash modes. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum HashMode { + Flat, + Nar, + Text, +} + +impl CAHash { + pub fn hash(&self) -> Cow<NixHash> { + match *self { + CAHash::Flat(ref digest) => Cow::Borrowed(digest), + CAHash::Nar(ref digest) => Cow::Borrowed(digest), + CAHash::Text(digest) => Cow::Owned(NixHash::Sha256(digest)), + } + } + + pub fn mode(&self) -> HashMode { + match self { + CAHash::Flat(_) => HashMode::Flat, + CAHash::Nar(_) => HashMode::Nar, + CAHash::Text(_) => HashMode::Text, + } + } + + /// Constructs a [CAHash] from the textual representation, + /// which is one of the three: + /// - `text:sha256:$nixbase32sha256digest` + /// - `fixed:r:$algo:$nixbase32digest` + /// - `fixed:$algo:$nixbase32digest` + /// which is the format that's used in the NARInfo for example. + pub fn from_nix_hex_str(s: &str) -> Option<Self> { + let (tag, s) = s.split_once(':')?; + + match tag { + "text" => { + let digest = s.strip_prefix("sha256:")?; + let digest = nixbase32::decode_fixed(digest).ok()?; + Some(CAHash::Text(digest)) + } + "fixed" => { + if let Some(s) = s.strip_prefix("r:") { + NixHash::from_nix_hex_str(s).map(CAHash::Nar) + } else { + NixHash::from_nix_hex_str(s).map(CAHash::Flat) + } + } + _ => None, + } + } + + /// Formats a [CAHash] in the Nix default hash format, which is the format + /// that's used in NARInfos for example. + pub fn to_nix_nixbase32_string(&self) -> String { + match self { + CAHash::Flat(nh) => format!("fixed:{}", nh.to_nix_nixbase32_string()), + CAHash::Nar(nh) => format!("fixed:r:{}", nh.to_nix_nixbase32_string()), + CAHash::Text(digest) => { + format!("text:sha256:{}", nixbase32::encode(digest)) + } + } + } + + /// This takes a serde_json::Map and turns it into this structure. This is necessary to do such + /// shenigans because we have external consumers, like the Derivation parser, who would like to + /// know whether we have a invalid or a missing NixHashWithMode structure in another structure, + /// e.g. Output. + /// This means we have this combinatorial situation: + /// - no hash, no hashAlgo: no [CAHash] so we return Ok(None). + /// - present hash, missing hashAlgo: invalid, we will return missing_field + /// - missing hash, present hashAlgo: same + /// - present hash, present hashAlgo: either we return ourselves or a type/value validation + /// error. + /// This function is for internal consumption regarding those needs until we have a better + /// solution. Now this is said, let's explain how this works. + /// + /// We want to map the serde data model into a [CAHash]. + /// + /// The serde data model has a `hash` field (containing a digest in nixbase32), + /// and a `hashAlgo` field, containing the stringified hash algo. + /// In case the hash is recursive, hashAlgo also has a `r:` prefix. + /// + /// This is to match how `nix show-derivation` command shows them in JSON + /// representation. + pub(crate) fn from_map<'de, D>(map: &Map<String, Value>) -> Result<Option<Self>, D::Error> + where + D: Deserializer<'de>, + { + // If we don't have hash neither hashAlgo, let's just return None. + if !map.contains_key("hash") && !map.contains_key("hashAlgo") { + return Ok(None); + } + + let hash_algo_v = map.get("hashAlgo").ok_or_else(|| { + serde::de::Error::missing_field( + "couldn't extract `hashAlgo` key, but `hash` key present", + ) + })?; + let hash_algo = hash_algo_v.as_str().ok_or_else(|| { + serde::de::Error::invalid_type(Unexpected::Other(&hash_algo_v.to_string()), &"a string") + })?; + let (mode_is_nar, hash_algo) = if let Some(s) = hash_algo.strip_prefix("r:") { + (true, s) + } else { + (false, hash_algo) + }; + let hash_algo = HashAlgo::try_from(hash_algo).map_err(|e| { + serde::de::Error::invalid_value( + Unexpected::Other(&e.to_string()), + &format!("one of {}", SUPPORTED_ALGOS.join(",")).as_str(), + ) + })?; + + let hash_v = map.get("hash").ok_or_else(|| { + serde::de::Error::missing_field( + "couldn't extract `hash` key but `hashAlgo` key present", + ) + })?; + let hash = hash_v.as_str().ok_or_else(|| { + serde::de::Error::invalid_type(Unexpected::Other(&hash_v.to_string()), &"a string") + })?; + let hash = decode_digest(hash.as_bytes(), hash_algo) + .map_err(|e| serde::de::Error::custom(e.to_string()))?; + if mode_is_nar { + Ok(Some(Self::Nar(hash))) + } else { + Ok(Some(Self::Flat(hash))) + } + } +} + +impl Serialize for CAHash { + /// map a CAHash into the serde data model. + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let mut map = serializer.serialize_map(Some(2))?; + match self { + CAHash::Flat(h) => { + map.serialize_entry("hash", &nixbase32::encode(h.digest_as_bytes()))?; + map.serialize_entry("hashAlgo", &h.algo())?; + } + CAHash::Nar(h) => { + map.serialize_entry("hash", &nixbase32::encode(h.digest_as_bytes()))?; + map.serialize_entry("hashAlgo", &format!("r:{}", &h.algo()))?; + } + // It is not legal for derivations to use this (which is where + // we're currently exercising [Serialize] mostly, + // but it's still good to be able to serialize other CA hashes too. + CAHash::Text(h) => { + map.serialize_entry("hash", &nixbase32::encode(h.as_ref()))?; + map.serialize_entry("hashAlgo", "text")?; + } + }; + map.end() + } +} + +impl<'de> Deserialize<'de> for CAHash { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + let value = Self::from_map::<D>(&Map::deserialize(deserializer)?)?; + + match value { + None => Err(serde::de::Error::custom("couldn't parse as map")), + Some(v) => Ok(v), + } + } +} + +#[cfg(test)] +mod tests { + use crate::{derivation::CAHash, nixhash}; + + #[test] + fn serialize_flat() { + let json_bytes = r#"{ + "hash": "1fnf2m46ya7r7afkcb8ba2j0sc4a85m749sh9jz64g4hx6z3r088", + "hashAlgo": "sha256" +}"#; + let hash = CAHash::Flat( + nixhash::from_nix_str( + "sha256:08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + ) + .unwrap(), + ); + let serialized = serde_json::to_string_pretty(&hash).unwrap(); + assert_eq!(serialized, json_bytes); + } + + #[test] + fn serialize_nar() { + let json_bytes = r#"{ + "hash": "1fnf2m46ya7r7afkcb8ba2j0sc4a85m749sh9jz64g4hx6z3r088", + "hashAlgo": "r:sha256" +}"#; + let hash = CAHash::Nar( + nixhash::from_nix_str( + "sha256:08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + ) + .unwrap(), + ); + let serialized = serde_json::to_string_pretty(&hash).unwrap(); + assert_eq!(serialized, json_bytes); + } + + #[test] + fn deserialize_flat() { + let json_bytes = r#" + { + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "sha256" + }"#; + let hash: CAHash = serde_json::from_str(json_bytes).expect("must parse"); + + assert_eq!( + hash, + CAHash::Flat( + nixhash::from_nix_str( + "sha256:08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba" + ) + .unwrap() + ) + ); + } + + #[test] + fn deserialize_hex() { + let json_bytes = r#" + { + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha256" + }"#; + let hash: CAHash = serde_json::from_str(json_bytes).expect("must parse"); + + assert_eq!( + hash, + CAHash::Nar( + nixhash::from_nix_str( + "sha256:08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba" + ) + .unwrap() + ) + ); + } + + #[test] + fn deserialize_nixbase32() { + let json_bytes = r#" + { + "hash": "1fnf2m46ya7r7afkcb8ba2j0sc4a85m749sh9jz64g4hx6z3r088", + "hashAlgo": "r:sha256" + }"#; + let hash: CAHash = serde_json::from_str(json_bytes).expect("must parse"); + + assert_eq!( + hash, + CAHash::Nar( + nixhash::from_nix_str( + "sha256:08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba" + ) + .unwrap() + ) + ); + } + + #[test] + fn deserialize_base64() { + let json_bytes = r#" + { + "hash": "CIE8vumQPGK+TFAncmpBijANpFALLTadOvkob0gVzro=", + "hashAlgo": "r:sha256" + }"#; + let hash: CAHash = serde_json::from_str(json_bytes).expect("must parse"); + + assert_eq!( + hash, + CAHash::Nar( + nixhash::from_nix_str( + "sha256:08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba" + ) + .unwrap() + ) + ); + } + + #[test] + fn serialize_deserialize_nar() { + let json_bytes = r#" + { + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha256" + }"#; + let hash: CAHash = serde_json::from_str(json_bytes).expect("must parse"); + + let serialized = serde_json::to_string(&hash).expect("Serialize"); + let hash2: CAHash = serde_json::from_str(&serialized).expect("must parse again"); + + assert_eq!(hash, hash2); + } + + #[test] + fn serialize_deserialize_flat() { + let json_bytes = r#" + { + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "sha256" + }"#; + let hash: CAHash = serde_json::from_str(json_bytes).expect("must parse"); + + let serialized = serde_json::to_string(&hash).expect("Serialize"); + let hash2: CAHash = serde_json::from_str(&serialized).expect("must parse again"); + + assert_eq!(hash, hash2); + } +} diff --git a/tvix/nix-compat/src/nixhash/mod.rs b/tvix/nix-compat/src/nixhash/mod.rs new file mode 100644 index 0000000000..d86cb8b79f --- /dev/null +++ b/tvix/nix-compat/src/nixhash/mod.rs @@ -0,0 +1,602 @@ +use crate::nixbase32; +use bstr::ByteSlice; +use data_encoding::{BASE64, BASE64_NOPAD, HEXLOWER}; +use serde::Deserialize; +use serde::Serialize; +use std::cmp::Ordering; +use std::fmt::Display; +use thiserror; + +mod algos; +mod ca_hash; + +pub use algos::HashAlgo; +pub use ca_hash::CAHash; +pub use ca_hash::HashMode as CAHashMode; + +/// NixHash represents hashes known by Nix. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum NixHash { + Md5([u8; 16]), + Sha1([u8; 20]), + Sha256([u8; 32]), + Sha512(Box<[u8; 64]>), +} + +/// Same order as sorting the corresponding nixbase32 strings. +/// +/// This order is used in the ATerm serialization of a derivation +/// and thus affects the calculated output hash. +impl Ord for NixHash { + fn cmp(&self, other: &NixHash) -> Ordering { + self.digest_as_bytes().cmp(other.digest_as_bytes()) + } +} + +// See Ord for reason to implement this manually. +impl PartialOrd for NixHash { + fn partial_cmp(&self, other: &NixHash) -> Option<Ordering> { + Some(self.cmp(other)) + } +} + +impl Display for NixHash { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> { + write!( + f, + "{}-{}", + self.algo(), + nixbase32::encode(self.digest_as_bytes()) + ) + } +} + +/// convenience Result type for all nixhash parsing Results. +pub type NixHashResult<V> = std::result::Result<V, Error>; + +impl NixHash { + /// returns the algo as [HashAlgo]. + pub fn algo(&self) -> HashAlgo { + match self { + NixHash::Md5(_) => HashAlgo::Md5, + NixHash::Sha1(_) => HashAlgo::Sha1, + NixHash::Sha256(_) => HashAlgo::Sha256, + NixHash::Sha512(_) => HashAlgo::Sha512, + } + } + + /// returns the digest as variable-length byte slice. + pub fn digest_as_bytes(&self) -> &[u8] { + match self { + NixHash::Md5(digest) => digest, + NixHash::Sha1(digest) => digest, + NixHash::Sha256(digest) => digest, + NixHash::Sha512(digest) => digest.as_ref(), + } + } + + /// Constructs a [NixHash] from the Nix default hash format, + /// the inverse of [Self::to_nix_hex_string]. + pub fn from_nix_hex_str(s: &str) -> Option<Self> { + let (tag, digest) = s.split_once(':')?; + + (match tag { + "md5" => nixbase32::decode_fixed(digest).map(NixHash::Md5), + "sha1" => nixbase32::decode_fixed(digest).map(NixHash::Sha1), + "sha256" => nixbase32::decode_fixed(digest).map(NixHash::Sha256), + "sha512" => nixbase32::decode_fixed(digest) + .map(Box::new) + .map(NixHash::Sha512), + _ => return None, + }) + .ok() + } + + /// Formats a [NixHash] in the Nix default hash format, + /// which is the algo, followed by a colon, then the lower hex encoded digest. + pub fn to_nix_hex_string(&self) -> String { + format!("{}:{}", self.algo(), self.to_plain_hex_string()) + } + + /// Formats a [NixHash] in the format that's used inside CAHash, + /// which is the algo, followed by a colon, then the nixbase32-encoded digest. + pub(crate) fn to_nix_nixbase32_string(&self) -> String { + format!( + "{}:{}", + self.algo(), + nixbase32::encode(self.digest_as_bytes()) + ) + } + + /// Returns the digest as a hex string -- without any algorithm prefix. + pub fn to_plain_hex_string(&self) -> String { + HEXLOWER.encode(self.digest_as_bytes()) + } +} + +impl TryFrom<(HashAlgo, &[u8])> for NixHash { + type Error = Error; + + /// Constructs a new [NixHash] by specifying [HashAlgo] and digest. + /// It can fail if the passed digest length doesn't match what's expected for + /// the passed algo. + fn try_from(value: (HashAlgo, &[u8])) -> NixHashResult<Self> { + let (algo, digest) = value; + from_algo_and_digest(algo, digest) + } +} + +impl<'de> Deserialize<'de> for NixHash { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + let str: &'de str = Deserialize::deserialize(deserializer)?; + from_str(str, None).map_err(|_| { + serde::de::Error::invalid_value(serde::de::Unexpected::Str(str), &"NixHash") + }) + } +} + +impl Serialize for NixHash { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + // encode as SRI + let string = format!("{}-{}", self.algo(), BASE64.encode(self.digest_as_bytes())); + string.serialize(serializer) + } +} + +/// Constructs a new [NixHash] by specifying [HashAlgo] and digest. +/// It can fail if the passed digest length doesn't match what's expected for +/// the passed algo. +pub fn from_algo_and_digest(algo: HashAlgo, digest: &[u8]) -> NixHashResult<NixHash> { + if digest.len() != algo.digest_length() { + return Err(Error::InvalidEncodedDigestLength(digest.len(), algo)); + } + + Ok(match algo { + HashAlgo::Md5 => NixHash::Md5(digest.try_into().unwrap()), + HashAlgo::Sha1 => NixHash::Sha1(digest.try_into().unwrap()), + HashAlgo::Sha256 => NixHash::Sha256(digest.try_into().unwrap()), + HashAlgo::Sha512 => NixHash::Sha512(Box::new(digest.try_into().unwrap())), + }) +} + +/// Errors related to NixHash construction. +#[derive(Debug, Eq, PartialEq, thiserror::Error)] +pub enum Error { + #[error("invalid hash algo: {0}")] + InvalidAlgo(String), + #[error("invalid SRI string: {0}")] + InvalidSRI(String), + #[error("invalid encoded digest length '{0}' for algo {1}")] + InvalidEncodedDigestLength(usize, HashAlgo), + #[error("invalid base16 encoding: {0}")] + InvalidBase16Encoding(data_encoding::DecodeError), + #[error("invalid base32 encoding: {0}")] + InvalidBase32Encoding(data_encoding::DecodeError), + #[error("invalid base64 encoding: {0}")] + InvalidBase64Encoding(data_encoding::DecodeError), + #[error("conflicting hash algo: {0} (hash_algo) vs {1} (inline)")] + ConflictingHashAlgos(HashAlgo, HashAlgo), + #[error("missing inline hash algo, but no externally-specified algo: {0}")] + MissingInlineHashAlgo(String), +} + +/// Nix allows specifying hashes in various encodings, and magically just +/// derives the encoding. +/// This function parses strings to a NixHash. +/// +/// Hashes can be: +/// - Nix hash strings +/// - SRI hashes +/// - bare digests +/// +/// Encoding for Nix hash strings or bare digests can be: +/// - base16 (lowerhex), +/// - nixbase32, +/// - base64 (StdEncoding) +/// - sri string +/// +/// The encoding is derived from the length of the string and the hash type. +/// The hash is communicated out-of-band, but might also be in-band (in the +/// case of a nix hash string or SRI), in which it needs to be consistent with the +/// one communicated out-of-band. +pub fn from_str(s: &str, algo_str: Option<&str>) -> NixHashResult<NixHash> { + // if algo_str is some, parse or bail out + let algo: Option<HashAlgo> = if let Some(algo_str) = algo_str { + Some(algo_str.try_into()?) + } else { + None + }; + + // Peek at the beginning of the string to detect SRI hashes. + if s.starts_with("sha1-") + || s.starts_with("sha256-") + || s.starts_with("sha512-") + || s.starts_with("md5-") + { + let parsed_nixhash = from_sri_str(s)?; + + // ensure the algo matches with what has been passed externally, if so. + if let Some(algo) = algo { + if algo != parsed_nixhash.algo() { + return Err(Error::ConflictingHashAlgos(algo, parsed_nixhash.algo())); + } + } + return Ok(parsed_nixhash); + } + + // Peek at the beginning again to see if it's a Nix Hash + if s.starts_with("sha1:") + || s.starts_with("sha256:") + || s.starts_with("sha512:") + || s.starts_with("md5:") + { + let parsed_nixhash = from_nix_str(s)?; + // ensure the algo matches with what has been passed externally, if so. + if let Some(algo) = algo { + if algo != parsed_nixhash.algo() { + return Err(Error::ConflictingHashAlgos(algo, parsed_nixhash.algo())); + } + } + return Ok(parsed_nixhash); + } + + // Neither of these, assume a bare digest, so there MUST be an externally-passed algo. + match algo { + // Fail if there isn't. + None => Err(Error::MissingInlineHashAlgo(s.to_string())), + Some(algo) => decode_digest(s.as_bytes(), algo), + } +} + +/// Parses a Nix hash string ($algo:$digest) to a NixHash. +pub fn from_nix_str(s: &str) -> NixHashResult<NixHash> { + if let Some(rest) = s.strip_prefix("sha1:") { + decode_digest(rest.as_bytes(), HashAlgo::Sha1) + } else if let Some(rest) = s.strip_prefix("sha256:") { + decode_digest(rest.as_bytes(), HashAlgo::Sha256) + } else if let Some(rest) = s.strip_prefix("sha512:") { + decode_digest(rest.as_bytes(), HashAlgo::Sha512) + } else if let Some(rest) = s.strip_prefix("md5:") { + decode_digest(rest.as_bytes(), HashAlgo::Md5) + } else { + Err(Error::InvalidAlgo(s.to_string())) + } +} + +/// Parses a Nix SRI string to a NixHash. +/// Contrary to the SRI spec, Nix doesn't have an understanding of passing +/// multiple hashes (with different algos) in SRI hashes. +/// It instead simply cuts everything off after the expected length for the +/// specified algo, and tries to parse the rest in permissive base64 (allowing +/// missing padding). +pub fn from_sri_str(s: &str) -> NixHashResult<NixHash> { + // split at the first occurence of "-" + let (algo_str, digest_str) = s + .split_once('-') + .ok_or_else(|| Error::InvalidSRI(s.to_string()))?; + + // try to map the part before that `-` to a supported hash algo: + let algo: HashAlgo = algo_str.try_into()?; + + // For the digest string, Nix ignores everything after the expected BASE64 + // (with padding) length, to account for the fact SRI allows specifying more + // than one checksum, so shorten it. + let digest_str = { + let encoded_max_len = BASE64.encode_len(algo.digest_length()); + if digest_str.len() > encoded_max_len { + digest_str[..encoded_max_len].as_bytes() + } else { + digest_str.as_bytes() + } + }; + + // if the digest string is too small to fit even the BASE64_NOPAD version, bail out. + if digest_str.len() < BASE64_NOPAD.encode_len(algo.digest_length()) { + return Err(Error::InvalidEncodedDigestLength(digest_str.len(), algo)); + } + + // trim potential padding, and use a version that does not do trailing bit + // checking. + let mut spec = BASE64_NOPAD.specification(); + spec.check_trailing_bits = false; + let encoding = spec + .encoding() + .expect("Tvix bug: failed to get the special base64 encoder for Nix SRI hashes"); + + let digest = encoding + .decode(digest_str.trim_end_with(|c| c == '=')) + .map_err(Error::InvalidBase64Encoding)?; + + from_algo_and_digest(algo, &digest) +} + +/// Decode a plain digest depending on the hash algo specified externally. +/// hexlower, nixbase32 and base64 encodings are supported - the encoding is +/// inferred from the input length. +fn decode_digest(s: &[u8], algo: HashAlgo) -> NixHashResult<NixHash> { + // for the chosen hash algo, calculate the expected (decoded) digest length + // (as bytes) + let digest = if s.len() == HEXLOWER.encode_len(algo.digest_length()) { + HEXLOWER + .decode(s.as_ref()) + .map_err(Error::InvalidBase16Encoding)? + } else if s.len() == nixbase32::encode_len(algo.digest_length()) { + nixbase32::decode(s).map_err(Error::InvalidBase32Encoding)? + } else if s.len() == BASE64.encode_len(algo.digest_length()) { + BASE64 + .decode(s.as_ref()) + .map_err(Error::InvalidBase64Encoding)? + } else { + Err(Error::InvalidEncodedDigestLength(s.len(), algo))? + }; + + Ok(from_algo_and_digest(algo, &digest).unwrap()) +} + +#[cfg(test)] +mod tests { + use crate::{ + nixbase32, + nixhash::{self, HashAlgo, NixHash}, + }; + use data_encoding::{BASE64, BASE64_NOPAD, HEXLOWER}; + use hex_literal::hex; + use rstest::rstest; + + const DIGEST_SHA1: [u8; 20] = hex!("6016777997c30ab02413cf5095622cd7924283ac"); + const DIGEST_SHA256: [u8; 32] = + hex!("a5ce9c155ed09397614646c9717fc7cd94b1023d7b76b618d409e4fefd6e9d39"); + const DIGEST_SHA512: [u8; 64] = hex!("ab40d0be3541f0774bba7815d13d10b03252e96e95f7dbb4ee99a3b431c21662fd6971a020160e39848aa5f305b9be0f78727b2b0789e39f124d21e92b8f39ef"); + const DIGEST_MD5: [u8; 16] = hex!("c4874a8897440b393d862d8fd459073f"); + + fn to_base16(digest: &[u8]) -> String { + HEXLOWER.encode(digest) + } + + fn to_nixbase32(digest: &[u8]) -> String { + nixbase32::encode(digest) + } + + fn to_base64(digest: &[u8]) -> String { + BASE64.encode(digest) + } + + fn to_base64_nopad(digest: &[u8]) -> String { + BASE64_NOPAD.encode(digest) + } + + // TODO + fn make_nixhash(algo: &HashAlgo, digest_encoded: String) -> String { + format!("{}:{}", algo, digest_encoded) + } + fn make_sri_string(algo: &HashAlgo, digest_encoded: String) -> String { + format!("{}-{}", algo, digest_encoded) + } + + /// Test parsing a hash string in various formats, and also when/how the out-of-band algo is needed. + #[rstest] + #[case::sha1(&NixHash::Sha1(DIGEST_SHA1))] + #[case::sha256(&NixHash::Sha256(DIGEST_SHA256))] + #[case::sha512(&NixHash::Sha512(Box::new(DIGEST_SHA512)))] + #[case::md5(&NixHash::Md5(DIGEST_MD5))] + fn from_str(#[case] expected_hash: &NixHash) { + let algo = &expected_hash.algo(); + let digest = expected_hash.digest_as_bytes(); + // parse SRI + { + // base64 without out-of-band algo + let s = make_sri_string(algo, to_base64(digest)); + let h = nixhash::from_str(&s, None).expect("must succeed"); + assert_eq!(expected_hash, &h); + + // base64 with out-of-band-algo + let s = make_sri_string(algo, to_base64(digest)); + let h = nixhash::from_str(&s, Some(&expected_hash.algo().to_string())) + .expect("must succeed"); + assert_eq!(expected_hash, &h); + + // base64_nopad without out-of-band algo + let s = make_sri_string(algo, to_base64_nopad(digest)); + let h = nixhash::from_str(&s, None).expect("must succeed"); + assert_eq!(expected_hash, &h); + + // base64_nopad with out-of-band-algo + let s = make_sri_string(algo, to_base64_nopad(digest)); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, &h); + } + + // parse plain base16. should succeed with algo out-of-band, but fail without. + { + let s = to_base16(digest); + nixhash::from_str(&s, None).expect_err("must fail"); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, &h); + } + + // parse plain nixbase32. should succeed with algo out-of-band, but fail without. + { + let s = to_nixbase32(digest); + nixhash::from_str(&s, None).expect_err("must fail"); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, &h); + } + + // parse plain base64. should succeed with algo out-of-band, but fail without. + { + let s = to_base64(digest); + nixhash::from_str(&s, None).expect_err("must fail"); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, &h); + } + + // parse Nix hash strings + { + // base16. should succeed with both algo out-of-band and in-band. + { + let s = make_nixhash(algo, to_base16(digest)); + assert_eq!( + expected_hash, + &nixhash::from_str(&s, None).expect("must succeed") + ); + assert_eq!( + expected_hash, + &nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed") + ); + } + // nixbase32. should succeed with both algo out-of-band and in-band. + { + let s = make_nixhash(algo, to_nixbase32(digest)); + assert_eq!( + expected_hash, + &nixhash::from_str(&s, None).expect("must succeed") + ); + assert_eq!( + expected_hash, + &nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed") + ); + } + // base64. should succeed with both algo out-of-band and in-band. + { + let s = make_nixhash(algo, to_base64(digest)); + assert_eq!( + expected_hash, + &nixhash::from_str(&s, None).expect("must succeed") + ); + assert_eq!( + expected_hash, + &nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed") + ); + } + } + } + + /// Test parsing an SRI hash via the [nixhash::from_sri_str] method. + #[test] + fn from_sri_str() { + let nix_hash = nixhash::from_sri_str("sha256-pc6cFV7Qk5dhRkbJcX/HzZSxAj17drYY1Ank/v1unTk=") + .expect("must succeed"); + + assert_eq!(HashAlgo::Sha256, nix_hash.algo()); + assert_eq!( + &hex!("a5ce9c155ed09397614646c9717fc7cd94b1023d7b76b618d409e4fefd6e9d39"), + nix_hash.digest_as_bytes() + ) + } + + /// Test parsing sha512 SRI hash with various paddings, Nix accepts all of them. + #[rstest] + #[case::no_padding("sha512-7g91TBvYoYQorRTqo+rYD/i5YnWvUBLnqDhPHxBJDaBW7smuPMeRp6E6JOFuVN9bzN0QnH1ToUU0u9c2CjALEQ")] + #[case::too_little_padding("sha512-7g91TBvYoYQorRTqo+rYD/i5YnWvUBLnqDhPHxBJDaBW7smuPMeRp6E6JOFuVN9bzN0QnH1ToUU0u9c2CjALEQ=")] + #[case::correct_padding("sha512-7g91TBvYoYQorRTqo+rYD/i5YnWvUBLnqDhPHxBJDaBW7smuPMeRp6E6JOFuVN9bzN0QnH1ToUU0u9c2CjALEQ==")] + #[case::too_much_padding("sha512-7g91TBvYoYQorRTqo+rYD/i5YnWvUBLnqDhPHxBJDaBW7smuPMeRp6E6JOFuVN9bzN0QnH1ToUU0u9c2CjALEQ===")] + #[case::additional_suffix_ignored("sha512-7g91TBvYoYQorRTqo+rYD/i5YnWvUBLnqDhPHxBJDaBW7smuPMeRp6E6JOFuVN9bzN0QnH1ToUU0u9c2CjALEQ== cheesecake")] + fn from_sri_str_sha512_paddings(#[case] sri_str: &str) { + let nix_hash = nixhash::from_sri_str(sri_str).expect("must succeed"); + + assert_eq!(HashAlgo::Sha512, nix_hash.algo()); + assert_eq!( + &hex!("ee0f754c1bd8a18428ad14eaa3ead80ff8b96275af5012e7a8384f1f10490da056eec9ae3cc791a7a13a24e16e54df5bccdd109c7d53a14534bbd7360a300b11"), + nix_hash.digest_as_bytes() + ) + } + + /// Ensure we detect truncated base64 digests, where the digest size + /// doesn't match what's expected from that hash function. + #[test] + fn from_sri_str_truncated() { + nixhash::from_sri_str("sha256-pc6cFV7Qk5dhRkbJcX/HzZSxAj17drYY1Ank") + .expect_err("must fail"); + } + + /// Ensure we fail on SRI hashes that Nix doesn't support. + #[test] + fn from_sri_str_unsupported() { + nixhash::from_sri_str( + "sha384-o4UVSl89mIB0sFUK+3jQbG+C9Zc9dRlV/Xd3KAvXEbhqxu0J5OAdg6b6VHKHwQ7U", + ) + .expect_err("must fail"); + } + + /// Ensure we reject invalid base64 encoding + #[test] + fn from_sri_str_invalid_base64() { + nixhash::from_sri_str("sha256-invalid=base64").expect_err("must fail"); + } + + /// Nix also accepts SRI strings with missing padding, but only in case the + /// string is expressed as SRI, so it still needs to have a `sha256-` prefix. + /// + /// This both seems to work if it is passed with and without specifying the + /// hash algo out-of-band (hash = "sha256-…" or sha256 = "sha256-…") + /// + /// Passing the same broken base64 string, but not as SRI, while passing + /// the hash algo out-of-band does not work. + #[test] + fn sha256_broken_padding() { + let broken_base64 = "fgIr3TyFGDAXP5+qoAaiMKDg/a1MlT6Fv/S/DaA24S8"; + // if padded with a trailing '=' + let expected_digest = + hex!("7e022bdd3c851830173f9faaa006a230a0e0fdad4c953e85bff4bf0da036e12f"); + + // passing hash algo out of band should succeed + let nix_hash = nixhash::from_str(&format!("sha256-{}", &broken_base64), Some("sha256")) + .expect("must succeed"); + assert_eq!(&expected_digest, &nix_hash.digest_as_bytes()); + + // not passing hash algo out of band should succeed + let nix_hash = + nixhash::from_str(&format!("sha256-{}", &broken_base64), None).expect("must succeed"); + assert_eq!(&expected_digest, &nix_hash.digest_as_bytes()); + + // not passing SRI, but hash algo out of band should fail + nixhash::from_str(broken_base64, Some("sha256")).expect_err("must fail"); + } + + /// As we decided to pass our hashes by trimming `=` completely, + /// we need to take into account hashes with padding requirements which + /// contains trailing bits which would be checked by `BASE64_NOPAD` and would + /// make the verification crash. + /// + /// This base64 has a trailing non-zero bit at bit 42. + #[test] + fn sha256_weird_base64() { + let weird_base64 = "syceJMUEknBDCHK8eGs6rUU3IQn+HnQfURfCrDxYPa9="; + let expected_digest = + hex!("b3271e24c5049270430872bc786b3aad45372109fe1e741f5117c2ac3c583daf"); + + let nix_hash = nixhash::from_str(&format!("sha256-{}", &weird_base64), Some("sha256")) + .expect("must succeed"); + assert_eq!(&expected_digest, &nix_hash.digest_as_bytes()); + + // not passing hash algo out of band should succeed + let nix_hash = + nixhash::from_str(&format!("sha256-{}", &weird_base64), None).expect("must succeed"); + assert_eq!(&expected_digest, &nix_hash.digest_as_bytes()); + + // not passing SRI, but hash algo out of band should fail + nixhash::from_str(weird_base64, Some("sha256")).expect_err("must fail"); + } + + #[test] + fn serialize_deserialize() { + let nixhash_actual = NixHash::Sha256(hex!( + "b3271e24c5049270430872bc786b3aad45372109fe1e741f5117c2ac3c583daf" + )); + let nixhash_str_json = "\"sha256-syceJMUEknBDCHK8eGs6rUU3IQn+HnQfURfCrDxYPa8=\""; + + let serialized = serde_json::to_string(&nixhash_actual).expect("can serialize"); + + assert_eq!(nixhash_str_json, &serialized); + + let deserialized: NixHash = + serde_json::from_str(nixhash_str_json).expect("must deserialize"); + assert_eq!(&nixhash_actual, &deserialized); + } +} diff --git a/tvix/nix-compat/src/path_info.rs b/tvix/nix-compat/src/path_info.rs new file mode 100644 index 0000000000..f289ebde33 --- /dev/null +++ b/tvix/nix-compat/src/path_info.rs @@ -0,0 +1,121 @@ +use crate::{nixbase32, nixhash::NixHash, store_path::StorePathRef}; +use serde::{Deserialize, Serialize}; +use std::collections::BTreeSet; + +/// Represents information about a Store Path that Nix provides inside the build +/// if the exportReferencesGraph feature is used. +/// This is not to be confused with the format Nix uses in its `nix path-info` command. +/// It includes some more fields, like `registrationTime`, `signatures` and `ultimate`, +/// does not include the `closureSize` and encodes `narHash` as SRI. +#[derive(Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)] +pub struct ExportedPathInfo<'a> { + #[serde(rename = "closureSize")] + pub closure_size: u64, + + #[serde( + rename = "narHash", + serialize_with = "to_nix_nixbase32_string", + deserialize_with = "from_nix_nixbase32_string" + )] + pub nar_sha256: [u8; 32], + + #[serde(rename = "narSize")] + pub nar_size: u64, + + #[serde(borrow)] + pub path: StorePathRef<'a>, + + /// The list of other Store Paths this Store Path refers to. + /// StorePathRef does Ord by the nixbase32-encoded string repr, so this is correct. + pub references: BTreeSet<StorePathRef<'a>>, + // more recent versions of Nix also have a `valid: true` field here, Nix 2.3 doesn't, + // and nothing seems to use it. +} + +/// ExportedPathInfo are ordered by their `path` field. +impl Ord for ExportedPathInfo<'_> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.path.cmp(&other.path) + } +} + +impl PartialOrd for ExportedPathInfo<'_> { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + Some(self.cmp(other)) + } +} + +fn to_nix_nixbase32_string<S>(v: &[u8; 32], serializer: S) -> Result<S::Ok, S::Error> +where + S: serde::Serializer, +{ + let string = NixHash::Sha256(*v).to_nix_nixbase32_string(); + string.serialize(serializer) +} + +/// The length of a sha256 digest, nixbase32-encoded. +const NIXBASE32_SHA256_ENCODE_LEN: usize = nixbase32::encode_len(32); + +fn from_nix_nixbase32_string<'de, D>(deserializer: D) -> Result<[u8; 32], D::Error> +where + D: serde::Deserializer<'de>, +{ + let str: &'de str = Deserialize::deserialize(deserializer)?; + + let digest_str = str.strip_prefix("sha256:").ok_or_else(|| { + serde::de::Error::invalid_value(serde::de::Unexpected::Str(str), &"sha256:…") + })?; + + let digest_str: [u8; NIXBASE32_SHA256_ENCODE_LEN] = + digest_str.as_bytes().try_into().map_err(|_| { + serde::de::Error::invalid_value(serde::de::Unexpected::Str(str), &"valid digest len") + })?; + + let digest: [u8; 32] = nixbase32::decode_fixed(digest_str).map_err(|_| { + serde::de::Error::invalid_value(serde::de::Unexpected::Str(str), &"valid nixbase32") + })?; + + Ok(digest) +} + +#[cfg(test)] +mod tests { + use hex_literal::hex; + + use super::*; + + /// Ensure we can create the same JSON as the exportReferencesGraph feature + #[test] + fn serialize_deserialize() { + // JSON extracted from a build of + // stdenv.mkDerivation { name = "hello"; __structuredAttrs = true; exportReferencesGraph.blub = [ pkgs.hello ]; nativeBuildInputs = [pkgs.jq]; buildCommand = "jq -rc .blub $NIX_ATTRS_JSON_FILE > $out"; } + let pathinfos_str_json = r#"[{"closureSize":1828984,"narHash":"sha256:11vm2x1ajhzsrzw7lsyss51mmr3b6yll9wdjn51bh7liwkpc8ila","narSize":1828984,"path":"/nix/store/7n0mbqydcipkpbxm24fab066lxk68aqk-libunistring-1.1","references":["/nix/store/7n0mbqydcipkpbxm24fab066lxk68aqk-libunistring-1.1"]},{"closureSize":32696176,"narHash":"sha256:0alzbhjxdcsmr1pk7z0bdh46r2xpq3xs3k9y82bi4bx5pklcvw5x","narSize":226560,"path":"/nix/store/dbghhbq1x39yxgkv3vkgfwbxrmw9nfzi-hello-2.12.1","references":["/nix/store/dbghhbq1x39yxgkv3vkgfwbxrmw9nfzi-hello-2.12.1","/nix/store/ddwyrxif62r8n6xclvskjyy6szdhvj60-glibc-2.39-5"]},{"closureSize":32469616,"narHash":"sha256:1zw5p05fh0k836ybfxkskv8apcv2m3pm2wa6y90wqn5w5kjyj13c","narSize":30119936,"path":"/nix/store/ddwyrxif62r8n6xclvskjyy6szdhvj60-glibc-2.39-5","references":["/nix/store/ddwyrxif62r8n6xclvskjyy6szdhvj60-glibc-2.39-5","/nix/store/rxganm4ibf31qngal3j3psp20mak37yy-xgcc-13.2.0-libgcc","/nix/store/s32cldbh9pfzd9z82izi12mdlrw0yf8q-libidn2-2.3.7"]},{"closureSize":159560,"narHash":"sha256:10q8iyvfmpfck3yiisnj1j8vp6lq3km17r26sr95zpdf9mgmk69s","narSize":159560,"path":"/nix/store/rxganm4ibf31qngal3j3psp20mak37yy-xgcc-13.2.0-libgcc","references":[]},{"closureSize":2190120,"narHash":"sha256:1cv997nzxbd91jhmzwnhxa1ahlzp5ffli8m4a5npcq8zg0vb1kwg","narSize":361136,"path":"/nix/store/s32cldbh9pfzd9z82izi12mdlrw0yf8q-libidn2-2.3.7","references":["/nix/store/7n0mbqydcipkpbxm24fab066lxk68aqk-libunistring-1.1","/nix/store/s32cldbh9pfzd9z82izi12mdlrw0yf8q-libidn2-2.3.7"]}]"#; + + // We ensure it roundtrips (to check the sorting is correct) + let deserialized: BTreeSet<ExportedPathInfo> = + serde_json::from_str(pathinfos_str_json).expect("must serialize"); + + let serialized_again = serde_json::to_string(&deserialized).expect("must deserialize"); + assert_eq!(pathinfos_str_json, serialized_again); + + // Also compare one specific item to be populated as expected. + assert_eq!( + &ExportedPathInfo { + closure_size: 1828984, + nar_sha256: hex!( + "8a46c4eee4911eb842b1b2f144a9376be45a43d1da6b7af8cffa43a942177587" + ), + nar_size: 1828984, + path: StorePathRef::from_bytes( + b"7n0mbqydcipkpbxm24fab066lxk68aqk-libunistring-1.1" + ) + .expect("must parse"), + references: BTreeSet::from_iter([StorePathRef::from_bytes( + b"7n0mbqydcipkpbxm24fab066lxk68aqk-libunistring-1.1" + ) + .unwrap()]), + }, + deserialized.first().unwrap() + ); + } +} diff --git a/tvix/nix-compat/src/store_path/mod.rs b/tvix/nix-compat/src/store_path/mod.rs new file mode 100644 index 0000000000..707c41a92d --- /dev/null +++ b/tvix/nix-compat/src/store_path/mod.rs @@ -0,0 +1,632 @@ +use crate::nixbase32; +use data_encoding::{DecodeError, BASE64}; +use serde::{Deserialize, Serialize}; +use std::{ + fmt, + path::PathBuf, + str::{self, FromStr}, +}; +use thiserror; + +#[cfg(target_family = "unix")] +use std::os::unix::ffi::OsStringExt; + +mod utils; + +pub use utils::*; + +pub const DIGEST_SIZE: usize = 20; +pub const ENCODED_DIGEST_SIZE: usize = nixbase32::encode_len(DIGEST_SIZE); + +// The store dir prefix, without trailing slash. +// That's usually where the Nix store is mounted at. +pub const STORE_DIR: &str = "/nix/store"; +pub const STORE_DIR_WITH_SLASH: &str = "/nix/store/"; + +/// Errors that can occur when parsing a literal store path +#[derive(Debug, PartialEq, Eq, thiserror::Error)] +pub enum Error { + #[error("Dash is missing between hash and name")] + MissingDash, + #[error("Hash encoding is invalid: {0}")] + InvalidHashEncoding(#[from] DecodeError), + #[error("Invalid length")] + InvalidLength, + #[error( + "Invalid name: \"{}\", character at position {} is invalid", + std::str::from_utf8(.0).unwrap_or(&BASE64.encode(.0)), + .1, + )] + InvalidName(Vec<u8>, u8), + #[error("Tried to parse an absolute path which was missing the store dir prefix.")] + MissingStoreDir, +} + +/// Represents a path in the Nix store (a direct child of [STORE_DIR]). +/// +/// It consists of a digest (20 bytes), and a name, which is a string. +/// The name may only contain ASCII alphanumeric, or one of the following +/// characters: `-`, `_`, `.`, `+`, `?`, `=`. +/// The name is usually used to describe the pname and version of a package. +/// Derivation paths can also be represented as store paths, their names just +/// end with the `.drv` prefix. +/// +/// A [StorePath] does not encode any additional subpath "inside" the store +/// path. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct StorePath { + digest: [u8; DIGEST_SIZE], + name: Box<str>, +} + +impl StorePath { + pub fn digest(&self) -> &[u8; DIGEST_SIZE] { + &self.digest + } + + pub fn name(&self) -> &str { + &self.name + } + + pub fn as_ref(&self) -> StorePathRef<'_> { + StorePathRef { + digest: self.digest, + name: &self.name, + } + } +} + +impl PartialOrd for StorePath { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + Some(self.cmp(other)) + } +} + +/// `StorePath`s are sorted by their reverse digest to match the sorting order +/// of the nixbase32-encoded string. +impl Ord for StorePath { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.as_ref().cmp(&other.as_ref()) + } +} + +impl FromStr for StorePath { + type Err = Error; + + /// Construct a [StorePath] by passing the `$digest-$name` string + /// that comes after [STORE_DIR_WITH_SLASH]. + fn from_str(s: &str) -> Result<Self, Self::Err> { + Self::from_bytes(s.as_bytes()) + } +} + +impl StorePath { + /// Construct a [StorePath] by passing the `$digest-$name` string + /// that comes after [STORE_DIR_WITH_SLASH]. + pub fn from_bytes(s: &[u8]) -> Result<StorePath, Error> { + Ok(StorePathRef::from_bytes(s)?.to_owned()) + } + + /// Decompose a string into a [StorePath] and a [PathBuf] containing the + /// rest of the path, or an error. + #[cfg(target_family = "unix")] + pub fn from_absolute_path_full(s: &str) -> Result<(StorePath, PathBuf), Error> { + // strip [STORE_DIR_WITH_SLASH] from s + match s.strip_prefix(STORE_DIR_WITH_SLASH) { + None => Err(Error::MissingStoreDir), + Some(rest) => { + // put rest in a PathBuf + let mut p = PathBuf::new(); + p.push(rest); + + let mut it = p.components(); + + // The first component of the rest must be parse-able as a [StorePath] + if let Some(first_component) = it.next() { + // convert first component to StorePath + let first_component_bytes = first_component.as_os_str().to_owned().into_vec(); + let store_path = StorePath::from_bytes(&first_component_bytes)?; + // collect rest + let rest_buf: PathBuf = it.collect(); + Ok((store_path, rest_buf)) + } else { + Err(Error::InvalidLength) // Well, or missing "/"? + } + } + } + } + + /// Returns an absolute store path string. + /// That is just the string representation, prefixed with the store prefix + /// ([STORE_DIR_WITH_SLASH]), + pub fn to_absolute_path(&self) -> String { + let sp_ref: StorePathRef = self.into(); + sp_ref.to_absolute_path() + } +} + +impl<'de> Deserialize<'de> for StorePath { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + let r = <StorePathRef<'de> as Deserialize<'de>>::deserialize(deserializer)?; + Ok(r.to_owned()) + } +} + +impl Serialize for StorePath { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + let r: StorePathRef = self.into(); + r.serialize(serializer) + } +} + +/// Like [StorePath], but without a heap allocation for the name. +/// Used by [StorePath] for parsing. +/// +#[derive(Debug, Eq, PartialEq, Clone, Copy, Hash)] +pub struct StorePathRef<'a> { + digest: [u8; DIGEST_SIZE], + name: &'a str, +} + +impl<'a> From<&'a StorePath> for StorePathRef<'a> { + fn from(&StorePath { digest, ref name }: &'a StorePath) -> Self { + StorePathRef { digest, name } + } +} + +impl<'a> PartialOrd for StorePathRef<'a> { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + Some(self.cmp(other)) + } +} + +/// `StorePathRef`s are sorted by their reverse digest to match the sorting order +/// of the nixbase32-encoded string. +impl<'a> Ord for StorePathRef<'a> { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.digest.iter().rev().cmp(other.digest.iter().rev()) + } +} + +impl<'a> StorePathRef<'a> { + pub fn digest(&self) -> &[u8; DIGEST_SIZE] { + &self.digest + } + + pub fn name(&self) -> &'a str { + self.name + } + + pub fn to_owned(&self) -> StorePath { + StorePath { + digest: self.digest, + name: self.name.into(), + } + } + + /// Construct a [StorePathRef] from a name and digest. + /// The name is validated, and the digest checked for size. + pub fn from_name_and_digest(name: &'a str, digest: &[u8]) -> Result<Self, Error> { + let digest_fixed = digest.try_into().map_err(|_| Error::InvalidLength)?; + Self::from_name_and_digest_fixed(name, digest_fixed) + } + + /// Construct a [StorePathRef] from a name and digest of correct length. + /// The name is validated. + pub fn from_name_and_digest_fixed( + name: &'a str, + digest: [u8; DIGEST_SIZE], + ) -> Result<Self, Error> { + Ok(Self { + name: validate_name(name.as_bytes())?, + digest, + }) + } + + /// Construct a [StorePathRef] from an absolute store path string. + /// This is equivalent to calling [StorePathRef::from_bytes], but stripping + /// the [STORE_DIR_WITH_SLASH] prefix before. + pub fn from_absolute_path(s: &'a [u8]) -> Result<Self, Error> { + match s.strip_prefix(STORE_DIR_WITH_SLASH.as_bytes()) { + Some(s_stripped) => Self::from_bytes(s_stripped), + None => Err(Error::MissingStoreDir), + } + } + + /// Construct a [StorePathRef] by passing the `$digest-$name` string + /// that comes after [STORE_DIR_WITH_SLASH]. + pub fn from_bytes(s: &'a [u8]) -> Result<Self, Error> { + // the whole string needs to be at least: + // + // - 32 characters (encoded hash) + // - 1 dash + // - 1 character for the name + if s.len() < ENCODED_DIGEST_SIZE + 2 { + Err(Error::InvalidLength)? + } + + let digest = nixbase32::decode_fixed(&s[..ENCODED_DIGEST_SIZE])?; + + if s[ENCODED_DIGEST_SIZE] != b'-' { + return Err(Error::MissingDash); + } + + Ok(StorePathRef { + digest, + name: validate_name(&s[ENCODED_DIGEST_SIZE + 1..])?, + }) + } + + /// Returns an absolute store path string. + /// That is just the string representation, prefixed with the store prefix + /// ([STORE_DIR_WITH_SLASH]), + pub fn to_absolute_path(&self) -> String { + format!("{}{}", STORE_DIR_WITH_SLASH, self) + } +} + +impl<'de: 'a, 'a> Deserialize<'de> for StorePathRef<'a> { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + let string: &'de str = Deserialize::deserialize(deserializer)?; + let stripped: Option<&str> = string.strip_prefix(STORE_DIR_WITH_SLASH); + let stripped: &str = stripped.ok_or_else(|| { + serde::de::Error::invalid_value( + serde::de::Unexpected::Str(string), + &"store path prefix", + ) + })?; + StorePathRef::from_bytes(stripped.as_bytes()).map_err(|_| { + serde::de::Error::invalid_value(serde::de::Unexpected::Str(string), &"StorePath") + }) + } +} + +impl Serialize for StorePathRef<'_> { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + let string: String = self.to_absolute_path(); + string.serialize(serializer) + } +} + +/// NAME_CHARS contains `true` for bytes that are valid in store path names. +static NAME_CHARS: [bool; 256] = { + let mut tbl = [false; 256]; + let mut c = 0; + + loop { + tbl[c as usize] = matches!(c, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'+' | b'-' | b'_' | b'?' | b'=' | b'.'); + + if c == u8::MAX { + break; + } + + c += 1; + } + + tbl +}; + +/// Checks a given &[u8] to match the restrictions for [StorePath::name], and +/// returns the name as string if successful. +pub(crate) fn validate_name(s: &(impl AsRef<[u8]> + ?Sized)) -> Result<&str, Error> { + let s = s.as_ref(); + + // Empty or excessively long names are not allowed. + if s.is_empty() || s.len() > 211 { + return Err(Error::InvalidLength); + } + + let mut valid = true; + for &c in s { + valid = valid && NAME_CHARS[c as usize]; + } + + if !valid { + for (i, &c) in s.iter().enumerate() { + if !NAME_CHARS[c as usize] { + return Err(Error::InvalidName(s.to_vec(), i as u8)); + } + } + + unreachable!(); + } + + // SAFETY: We permit a subset of ASCII, which guarantees valid UTF-8. + Ok(unsafe { str::from_utf8_unchecked(s) }) +} + +impl fmt::Display for StorePath { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + StorePathRef::from(self).fmt(f) + } +} + +impl fmt::Display for StorePathRef<'_> { + /// The string representation of a store path starts with a digest (20 + /// bytes), [crate::nixbase32]-encoded, followed by a `-`, + /// and ends with the name. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}-{}", nixbase32::encode(&self.digest), self.name) + } +} + +#[cfg(test)] +mod tests { + use super::Error; + use std::cmp::Ordering; + use std::path::PathBuf; + + use crate::store_path::{StorePath, StorePathRef, DIGEST_SIZE}; + use hex_literal::hex; + use pretty_assertions::assert_eq; + use rstest::rstest; + use serde::Deserialize; + + #[derive(Deserialize)] + /// An example struct, holding a StorePathRef. + /// Used to test deserializing StorePathRef. + struct Container<'a> { + #[serde(borrow)] + store_path: StorePathRef<'a>, + } + + #[test] + fn happy_path() { + let example_nix_path_str = + "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432"; + let nixpath = StorePath::from_bytes(example_nix_path_str.as_bytes()) + .expect("Error parsing example string"); + + let expected_digest: [u8; DIGEST_SIZE] = hex!("8a12321522fd91efbd60ebb2481af88580f61600"); + + assert_eq!("net-tools-1.60_p20170221182432", nixpath.name()); + assert_eq!(nixpath.digest, expected_digest); + + assert_eq!(example_nix_path_str, nixpath.to_string()) + } + + #[test] + fn store_path_ordering() { + let store_paths = [ + "/nix/store/0lk5dgi01r933abzfj9c9wlndg82yd3g-psutil-5.9.6.tar.gz.drv", + "/nix/store/1xj43bva89f9qmwm37zl7r3d7m67i9ck-shorttoc-1.3-tex.drv", + "/nix/store/2gb633czchi20jq1kqv70rx2yvvgins8-lifted-base-0.2.3.12.tar.gz.drv", + "/nix/store/2vksym3r3zqhp15q3fpvw2mnvffv11b9-docbook-xml-4.5.zip.drv", + "/nix/store/5q918awszjcz5720xvpc2czbg1sdqsf0-rust_renaming-0.1.0-lib", + "/nix/store/7jw30i342sr2p1fmz5xcfnch65h4zbd9-dbus-1.14.10.tar.xz.drv", + "/nix/store/96yqwqhnp3qya4rf4n0rcl0lwvrylp6k-eap8021x-222.40.1.tar.gz.drv", + "/nix/store/9gjqg36a1v0axyprbya1hkaylmnffixg-virtualenv-20.24.5.tar.gz.drv", + "/nix/store/a4i5mci2g9ada6ff7ks38g11dg6iqyb8-perl-5.32.1.drv", + "/nix/store/a5g76ljava4h5pxlggz3aqdhs3a4fk6p-ToolchainInfo.plist.drv", + "/nix/store/db46l7d6nswgz4ffp1mmd56vjf9g51v6-version.plist.drv", + "/nix/store/g6f7w20sd7vwy0rc1r4bfsw4ciclrm4q-crates-io-num_cpus-1.12.0.drv", + "/nix/store/iw82n1wwssb8g6772yddn8c3vafgv9np-bootstrap-stage1-sysctl-stdenv-darwin.drv", + "/nix/store/lp78d1y5wxpcn32d5c4r7xgbjwiw0cgf-logo.svg.drv", + "/nix/store/mf00ank13scv1f9l1zypqdpaawjhfr3s-python3.11-psutil-5.9.6.drv", + "/nix/store/mpfml61ra7pz90124jx9r3av0kvkz2w1-perl5.36.0-Encode-Locale-1.05", + "/nix/store/qhsvwx4h87skk7c4mx0xljgiy3z93i23-source.drv", + "/nix/store/riv7d73adim8hq7i04pr8kd0jnj93nav-fdk-aac-2.0.2.tar.gz.drv", + "/nix/store/s64b9031wga7vmpvgk16xwxjr0z9ln65-human-signals-5.0.0.tgz-extracted", + "/nix/store/w6svg3m2xdh6dhx0gl1nwa48g57d3hxh-thiserror-1.0.49", + ]; + + for w in store_paths.windows(2) { + if w.len() < 2 { + continue; + } + let (pa, _) = StorePath::from_absolute_path_full(w[0]).expect("parseable"); + let (pb, _) = StorePath::from_absolute_path_full(w[1]).expect("parseable"); + assert_eq!( + Ordering::Less, + pa.cmp(&pb), + "{:?} not less than {:?}", + w[0], + w[1] + ); + } + } + + /// This is the store path *accepted* when `nix-store --add`'ing an + /// empty `.gitignore` file. + /// + /// Nix 2.4 accidentally permitted this behaviour, but the revert came + /// too late to beat Hyrum's law. It is now considered permissible. + /// + /// https://github.com/NixOS/nix/pull/9095 (revert) + /// https://github.com/NixOS/nix/pull/9867 (revert-of-revert) + #[test] + fn starts_with_dot() { + StorePath::from_bytes(b"fli4bwscgna7lpm7v5xgnjxrxh0yc7ra-.gitignore") + .expect("must succeed"); + } + + #[test] + fn empty_name() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yy-").expect_err("must fail"); + } + + #[test] + fn excessive_length() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yy-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") + .expect_err("must fail"); + } + + #[test] + fn invalid_hash_length() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yy-net-tools-1.60_p20170221182432") + .expect_err("must fail"); + } + + #[test] + fn invalid_encoding_hash() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48aku4la-net-tools-1.60_p20170221182432") + .expect_err("must fail"); + } + + #[test] + fn more_than_just_the_bare_nix_store_path() { + StorePath::from_bytes( + b"00bgd045z0d4icpbc2yyz4gx48aku4la-net-tools-1.60_p20170221182432/bin/arp", + ) + .expect_err("must fail"); + } + + #[test] + fn no_dash_between_hash_and_name() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44lanet-tools-1.60_p20170221182432") + .expect_err("must fail"); + } + + #[test] + fn absolute_path() { + let example_nix_path_str = + "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432"; + let nixpath_expected = + StorePathRef::from_bytes(example_nix_path_str.as_bytes()).expect("must parse"); + + let nixpath_actual = StorePathRef::from_absolute_path( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432".as_bytes(), + ) + .expect("must parse"); + + assert_eq!(nixpath_expected, nixpath_actual); + + assert_eq!( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + nixpath_actual.to_absolute_path(), + ); + } + + #[test] + fn absolute_path_missing_prefix() { + assert_eq!( + Error::MissingStoreDir, + StorePathRef::from_absolute_path(b"foobar-123").expect_err("must fail") + ); + } + + #[test] + fn serialize_ref() { + let nixpath_actual = StorePathRef::from_bytes( + b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + ) + .expect("can parse"); + + let serialized = serde_json::to_string(&nixpath_actual).expect("can serialize"); + + assert_eq!( + "\"/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432\"", + &serialized + ); + } + + #[test] + fn serialize_owned() { + let nixpath_actual = StorePath::from_bytes( + b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + ) + .expect("can parse"); + + let serialized = serde_json::to_string(&nixpath_actual).expect("can serialize"); + + assert_eq!( + "\"/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432\"", + &serialized + ); + } + + #[test] + fn deserialize_ref() { + let store_path_str_json = + "\"/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432\""; + + let store_path: StorePathRef<'_> = + serde_json::from_str(store_path_str_json).expect("valid json"); + + assert_eq!( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + store_path.to_absolute_path() + ); + } + + #[test] + fn deserialize_ref_container() { + let str_json = "{\"store_path\":\"/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432\"}"; + + let container: Container<'_> = serde_json::from_str(str_json).expect("must deserialize"); + + assert_eq!( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + container.store_path.to_absolute_path() + ); + } + + #[test] + fn deserialize_owned() { + let store_path_str_json = + "\"/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432\""; + + let store_path: StorePath = serde_json::from_str(store_path_str_json).expect("valid json"); + + assert_eq!( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + store_path.to_absolute_path() + ); + } + + #[rstest] + #[case::without_prefix( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::new())] + #[case::without_prefix_but_trailing_slash( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432/", + StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::new())] + #[case::with_prefix( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432/bin/arp", + StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::from("bin/arp"))] + #[case::with_prefix_and_trailing_slash( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432/bin/arp/", + StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::from("bin/arp/"))] + fn from_absolute_path_full( + #[case] s: &str, + #[case] exp_store_path: StorePath, + #[case] exp_path: PathBuf, + ) { + let (actual_store_path, actual_path) = + StorePath::from_absolute_path_full(s).expect("must succeed"); + + assert_eq!(exp_store_path, actual_store_path); + assert_eq!(exp_path, actual_path); + } + + #[test] + fn from_absolute_path_errors() { + assert_eq!( + Error::InvalidLength, + StorePath::from_absolute_path_full("/nix/store/").expect_err("must fail") + ); + assert_eq!( + Error::InvalidLength, + StorePath::from_absolute_path_full("/nix/store/foo").expect_err("must fail") + ); + assert_eq!( + Error::MissingStoreDir, + StorePath::from_absolute_path_full( + "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432" + ) + .expect_err("must fail") + ); + } +} diff --git a/tvix/nix-compat/src/store_path/utils.rs b/tvix/nix-compat/src/store_path/utils.rs new file mode 100644 index 0000000000..d6f390db85 --- /dev/null +++ b/tvix/nix-compat/src/store_path/utils.rs @@ -0,0 +1,293 @@ +use crate::nixbase32; +use crate::nixhash::{CAHash, NixHash}; +use crate::store_path::{Error, StorePathRef, STORE_DIR}; +use data_encoding::HEXLOWER; +use sha2::{Digest, Sha256}; +use thiserror; + +/// Errors that can occur when creating a content-addressed store path. +/// +/// This wraps the main [crate::store_path::Error].. +#[derive(Debug, PartialEq, Eq, thiserror::Error)] +pub enum BuildStorePathError { + #[error("Invalid Store Path: {0}")] + InvalidStorePath(Error), + /// This error occurs when we have references outside the SHA-256 + + /// Recursive case. The restriction comes from upstream Nix. It may be + /// lifted at some point but there isn't a pressing need to anticipate that. + #[error("References were not supported as much as requested")] + InvalidReference(), +} + +/// compress_hash takes an arbitrarily long sequence of bytes (usually +/// a hash digest), and returns a sequence of bytes of length +/// OUTPUT_SIZE. +/// +/// It's calculated by rotating through the bytes in the output buffer +/// (zero- initialized), and XOR'ing with each byte of the passed +/// input. It consumes 1 byte at a time, and XOR's it with the current +/// value in the output buffer. +/// +/// This mimics equivalent functionality in C++ Nix. +pub fn compress_hash<const OUTPUT_SIZE: usize>(input: &[u8]) -> [u8; OUTPUT_SIZE] { + let mut output = [0; OUTPUT_SIZE]; + + for (ii, ch) in input.iter().enumerate() { + output[ii % OUTPUT_SIZE] ^= ch; + } + + output +} + +/// This builds a store path, by calculating the text_hash_string of either a +/// derivation or a literal text file that may contain references. +/// If you don't want to have to pass the entire contents, you might want to use +/// [build_ca_path] instead. +pub fn build_text_path<S: AsRef<str>, I: IntoIterator<Item = S>, C: AsRef<[u8]>>( + name: &str, + content: C, + references: I, +) -> Result<StorePathRef<'_>, BuildStorePathError> { + // produce the sha256 digest of the contents + let content_digest = Sha256::new_with_prefix(content).finalize().into(); + + build_ca_path(name, &CAHash::Text(content_digest), references, false) +} + +/// This builds a store path from a [CAHash] and a list of references. +pub fn build_ca_path<'a, S: AsRef<str>, I: IntoIterator<Item = S>>( + name: &'a str, + ca_hash: &CAHash, + references: I, + self_reference: bool, +) -> Result<StorePathRef<'a>, BuildStorePathError> { + // self references are only allowed for CAHash::Nar(NixHash::Sha256(_)). + if self_reference && matches!(ca_hash, CAHash::Nar(NixHash::Sha256(_))) { + return Err(BuildStorePathError::InvalidReference()); + } + + /// Helper function, used for the non-sha256 [CAHash::Nar] and all [CAHash::Flat]. + fn fixed_out_digest(prefix: &str, hash: &NixHash) -> [u8; 32] { + Sha256::new_with_prefix(format!("{}:{}:", prefix, hash.to_nix_hex_string())) + .finalize() + .into() + } + + let (ty, inner_digest) = match &ca_hash { + CAHash::Text(ref digest) => (make_references_string("text", references, false), *digest), + CAHash::Nar(NixHash::Sha256(ref digest)) => ( + make_references_string("source", references, self_reference), + *digest, + ), + + // for all other CAHash::Nar, another custom scheme is used. + CAHash::Nar(ref hash) => { + if references.into_iter().next().is_some() { + return Err(BuildStorePathError::InvalidReference()); + } + + ( + "output:out".to_string(), + fixed_out_digest("fixed:out:r", hash), + ) + } + // CaHash::Flat is using something very similar, except the `r:` prefix. + CAHash::Flat(ref hash) => { + if references.into_iter().next().is_some() { + return Err(BuildStorePathError::InvalidReference()); + } + + ( + "output:out".to_string(), + fixed_out_digest("fixed:out", hash), + ) + } + }; + + build_store_path_from_fingerprint_parts(&ty, &inner_digest, name) + .map_err(BuildStorePathError::InvalidStorePath) +} + +/// For given NAR sha256 digest and name, return the new [StorePathRef] this +/// would have, or an error, in case the name is invalid. +pub fn build_nar_based_store_path<'a>( + nar_sha256_digest: &[u8; 32], + name: &'a str, +) -> Result<StorePathRef<'a>, BuildStorePathError> { + let nar_hash_with_mode = CAHash::Nar(NixHash::Sha256(nar_sha256_digest.to_owned())); + + build_ca_path(name, &nar_hash_with_mode, Vec::<String>::new(), false) +} + +/// This builds an input-addressed store path. +/// +/// Input-addresed store paths are always derivation outputs, the "input" in question is the +/// derivation and its closure. +pub fn build_output_path<'a>( + drv_sha256: &[u8; 32], + output_name: &str, + output_path_name: &'a str, +) -> Result<StorePathRef<'a>, Error> { + build_store_path_from_fingerprint_parts( + &(String::from("output:") + output_name), + drv_sha256, + output_path_name, + ) +} + +/// This builds a store path from fingerprint parts. +/// Usually, that function is used from [build_text_path] and +/// passed a "text hash string" (starting with "text:" as fingerprint), +/// but other fingerprints starting with "output:" are also used in Derivation +/// output path calculation. +/// +/// The fingerprint is hashed with sha256, and its digest is compressed to 20 +/// bytes. +/// Inside a StorePath, that digest is printed nixbase32-encoded +/// (32 characters). +fn build_store_path_from_fingerprint_parts<'a>( + ty: &str, + inner_digest: &[u8; 32], + name: &'a str, +) -> Result<StorePathRef<'a>, Error> { + let fingerprint = format!( + "{ty}:sha256:{}:{STORE_DIR}:{name}", + HEXLOWER.encode(inner_digest) + ); + // name validation happens in here. + StorePathRef::from_name_and_digest_fixed( + name, + compress_hash(&Sha256::new_with_prefix(fingerprint).finalize()), + ) +} + +/// This contains the Nix logic to create "text hash strings", which are used +/// in `builtins.toFile`, as well as in Derivation Path calculation. +/// +/// A text hash is calculated by concatenating the following fields, separated by a `:`: +/// +/// - text +/// - references, individually joined by `:` +/// - the nix_hash_string representation of the sha256 digest of some contents +/// - the value of `storeDir` +/// - the name +fn make_references_string<S: AsRef<str>, I: IntoIterator<Item = S>>( + ty: &str, + references: I, + self_ref: bool, +) -> String { + let mut s = String::from(ty); + + for reference in references { + s.push(':'); + s.push_str(reference.as_ref()); + } + + if self_ref { + s.push_str(":self"); + } + + s +} + +/// Nix placeholders (i.e. values returned by `builtins.placeholder`) +/// are used to populate outputs with paths that must be +/// string-replaced with the actual placeholders later, at runtime. +/// +/// The actual placeholder is basically just a SHA256 hash encoded in +/// cppnix format. +pub fn hash_placeholder(name: &str) -> String { + let digest = Sha256::new_with_prefix(format!("nix-output:{}", name)).finalize(); + + format!("/{}", nixbase32::encode(&digest)) +} + +#[cfg(test)] +mod test { + use hex_literal::hex; + + use super::*; + use crate::nixhash::{CAHash, NixHash}; + + #[test] + fn build_text_path_with_zero_references() { + // This hash should match `builtins.toFile`, e.g.: + // + // nix-repl> builtins.toFile "foo" "bar" + // "/nix/store/vxjiwkjkn7x4079qvh1jkl5pn05j2aw0-foo" + + let store_path = build_text_path("foo", "bar", Vec::<String>::new()) + .expect("build_store_path() should succeed"); + + assert_eq!( + store_path.to_absolute_path().as_str(), + "/nix/store/vxjiwkjkn7x4079qvh1jkl5pn05j2aw0-foo" + ); + } + + #[test] + fn build_text_path_with_non_zero_references() { + // This hash should match: + // + // nix-repl> builtins.toFile "baz" "${builtins.toFile "foo" "bar"}" + // "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" + + let inner = build_text_path("foo", "bar", Vec::<String>::new()) + .expect("path_with_references() should succeed"); + let inner_path = inner.to_absolute_path(); + + let outer = build_text_path("baz", &inner_path, vec![inner_path.as_str()]) + .expect("path_with_references() should succeed"); + + assert_eq!( + outer.to_absolute_path().as_str(), + "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" + ); + } + + #[test] + fn build_sha1_path() { + let outer = build_ca_path( + "bar", + &CAHash::Nar(NixHash::Sha1(hex!( + "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33" + ))), + Vec::<String>::new(), + false, + ) + .expect("path_with_references() should succeed"); + + assert_eq!( + outer.to_absolute_path().as_str(), + "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar" + ); + } + + #[test] + fn build_store_path_with_non_zero_references() { + // This hash should match: + // + // nix-repl> builtins.toFile "baz" "${builtins.toFile "foo" "bar"}" + // "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" + // + // $ nix store make-content-addressed /nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz + // rewrote '/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz' to '/nix/store/s89y431zzhmdn3k8r96rvakryddkpv2v-baz' + let outer = build_ca_path( + "baz", + &CAHash::Nar(NixHash::Sha256( + nixbase32::decode(b"1xqkzcb3909fp07qngljr4wcdnrh1gdam1m2n29i6hhrxlmkgkv1") + .expect("nixbase32 should decode") + .try_into() + .expect("should have right len"), + )), + vec!["/nix/store/dxwkwjzdaq7ka55pkk252gh32bgpmql4-foo"], + false, + ) + .expect("path_with_references() should succeed"); + + assert_eq!( + outer.to_absolute_path().as_str(), + "/nix/store/s89y431zzhmdn3k8r96rvakryddkpv2v-baz" + ); + } +} diff --git a/tvix/nix-compat/src/wire/bytes/mod.rs b/tvix/nix-compat/src/wire/bytes/mod.rs new file mode 100644 index 0000000000..2ed071e379 --- /dev/null +++ b/tvix/nix-compat/src/wire/bytes/mod.rs @@ -0,0 +1,283 @@ +use std::{ + io::{Error, ErrorKind}, + mem::MaybeUninit, + ops::RangeInclusive, +}; +use tokio::io::{self, AsyncReadExt, AsyncWriteExt, ReadBuf}; + +pub(crate) mod reader; +pub use reader::BytesReader; +mod writer; +pub use writer::BytesWriter; + +/// 8 null bytes, used to write out padding. +const EMPTY_BYTES: &[u8; 8] = &[0u8; 8]; + +/// The length of the size field, in bytes is always 8. +const LEN_SIZE: usize = 8; + +/// Read a "bytes wire packet" from the AsyncRead. +/// Rejects reading more than `allowed_size` bytes of payload. +/// +/// The packet is made up of three parts: +/// - a length header, u64, LE-encoded +/// - the payload itself +/// - null bytes to the next 8 byte boundary +/// +/// Ensures the payload size fits into the `allowed_size` passed, +/// and that the padding is actual null bytes. +/// +/// On success, the returned `Vec<u8>` only contains the payload itself. +/// On failure (for example if a too large byte packet was sent), the reader +/// becomes unusable. +/// +/// This buffers the entire payload into memory, +/// a streaming version is available at [crate::wire::bytes::BytesReader]. +pub async fn read_bytes<R: ?Sized>( + r: &mut R, + allowed_size: RangeInclusive<usize>, +) -> io::Result<Vec<u8>> +where + R: AsyncReadExt + Unpin, +{ + // read the length field + let len = r.read_u64_le().await?; + let len: usize = len + .try_into() + .ok() + .filter(|len| allowed_size.contains(len)) + .ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidData, + "signalled package size not in allowed range", + ) + })?; + + // calculate the total length, including padding. + // byte packets are padded to 8 byte blocks each. + let padded_len = padding_len(len as u64) as u64 + (len as u64); + let mut limited_reader = r.take(padded_len); + + let mut buf = Vec::new(); + + let s = limited_reader.read_to_end(&mut buf).await?; + + // make sure we got exactly the number of bytes, and not less. + if s as u64 != padded_len { + return Err(io::ErrorKind::UnexpectedEof.into()); + } + + let (_content, padding) = buf.split_at(len); + + // ensure the padding is all zeroes. + if padding.iter().any(|&b| b != 0) { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "padding is not all zeroes", + )); + } + + // return the data without the padding + buf.truncate(len); + Ok(buf) +} + +pub(crate) async fn read_bytes_buf<'a, const N: usize, R: ?Sized>( + reader: &mut R, + buf: &'a mut [MaybeUninit<u8>; N], + allowed_size: RangeInclusive<usize>, +) -> io::Result<&'a [u8]> +where + R: AsyncReadExt + Unpin, +{ + assert_eq!(N % 8, 0); + assert!(*allowed_size.end() <= N); + + let len = reader.read_u64_le().await?; + let len: usize = len + .try_into() + .ok() + .filter(|len| allowed_size.contains(len)) + .ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidData, + "signalled package size not in allowed range", + ) + })?; + + let buf_len = (len + 7) & !7; + let buf = { + let mut read_buf = ReadBuf::uninit(&mut buf[..buf_len]); + + while read_buf.filled().len() < buf_len { + reader.read_buf(&mut read_buf).await?; + } + + // ReadBuf::filled does not pass the underlying buffer's lifetime through, + // so we must make a trip to hell. + // + // SAFETY: `read_buf` is filled up to `buf_len`, and we verify that it is + // still pointing at the same underlying buffer. + unsafe { + assert_eq!(read_buf.filled().as_ptr(), buf.as_ptr() as *const u8); + assume_init_bytes(&buf[..buf_len]) + } + }; + + if buf[len..buf_len].iter().any(|&b| b != 0) { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "padding is not all zeroes", + )); + } + + Ok(&buf[..len]) +} + +/// SAFETY: The bytes have to actually be initialized. +unsafe fn assume_init_bytes(slice: &[MaybeUninit<u8>]) -> &[u8] { + &*(slice as *const [MaybeUninit<u8>] as *const [u8]) +} + +/// Read a "bytes wire packet" of from the AsyncRead and tries to parse as string. +/// Internally uses [read_bytes]. +/// Rejects reading more than `allowed_size` bytes of payload. +pub async fn read_string<R>(r: &mut R, allowed_size: RangeInclusive<usize>) -> io::Result<String> +where + R: AsyncReadExt + Unpin, +{ + let bytes = read_bytes(r, allowed_size).await?; + String::from_utf8(bytes).map_err(|e| Error::new(ErrorKind::InvalidData, e)) +} + +/// Writes a "bytes wire packet" to a (hopefully buffered) [AsyncWriteExt]. +/// +/// Accepts anything implementing AsRef<[u8]> as payload. +/// +/// See [read_bytes] for a description of the format. +/// +/// Note: if performance matters to you, make sure your +/// [AsyncWriteExt] handle is buffered. This function is quite +/// write-intesive. +pub async fn write_bytes<W: AsyncWriteExt + Unpin, B: AsRef<[u8]>>( + w: &mut W, + b: B, +) -> io::Result<()> { + // write the size packet. + w.write_u64_le(b.as_ref().len() as u64).await?; + + // write the payload + w.write_all(b.as_ref()).await?; + + // write padding if needed + let padding_len = padding_len(b.as_ref().len() as u64) as usize; + if padding_len != 0 { + w.write_all(&EMPTY_BYTES[..padding_len]).await?; + } + Ok(()) +} + +/// Computes the number of bytes we should add to len (a length in +/// bytes) to be aligned on 64 bits (8 bytes). +fn padding_len(len: u64) -> u8 { + let aligned = len.wrapping_add(7) & !7; + aligned.wrapping_sub(len) as u8 +} + +#[cfg(test)] +mod tests { + use tokio_test::{assert_ok, io::Builder}; + + use super::*; + use hex_literal::hex; + + /// The maximum length of bytes packets we're willing to accept in the test + /// cases. + const MAX_LEN: usize = 1024; + + #[tokio::test] + async fn test_read_8_bytes() { + let mut mock = Builder::new() + .read(&8u64.to_le_bytes()) + .read(&12345678u64.to_le_bytes()) + .build(); + + assert_eq!( + &12345678u64.to_le_bytes(), + read_bytes(&mut mock, 0..=MAX_LEN).await.unwrap().as_slice() + ); + } + + #[tokio::test] + async fn test_read_9_bytes() { + let mut mock = Builder::new() + .read(&9u64.to_le_bytes()) + .read(&hex!("01020304050607080900000000000000")) + .build(); + + assert_eq!( + hex!("010203040506070809"), + read_bytes(&mut mock, 0..=MAX_LEN).await.unwrap().as_slice() + ); + } + + #[tokio::test] + async fn test_read_0_bytes() { + // A empty byte packet is essentially just the 0 length field. + // No data is read, and there's zero padding. + let mut mock = Builder::new().read(&0u64.to_le_bytes()).build(); + + assert_eq!( + hex!(""), + read_bytes(&mut mock, 0..=MAX_LEN).await.unwrap().as_slice() + ); + } + + #[tokio::test] + /// Ensure we don't read any further than the size field if the length + /// doesn't match the range we want to accept. + async fn test_read_reject_too_large() { + let mut mock = Builder::new().read(&100u64.to_le_bytes()).build(); + + read_bytes(&mut mock, 10..=10) + .await + .expect_err("expect this to fail"); + } + + #[tokio::test] + async fn test_write_bytes_no_padding() { + let input = hex!("6478696f34657661"); + let len = input.len() as u64; + let mut mock = Builder::new() + .write(&len.to_le_bytes()) + .write(&input) + .build(); + assert_ok!(write_bytes(&mut mock, &input).await) + } + #[tokio::test] + async fn test_write_bytes_with_padding() { + let input = hex!("322e332e3137"); + let len = input.len() as u64; + let mut mock = Builder::new() + .write(&len.to_le_bytes()) + .write(&hex!("322e332e31370000")) + .build(); + assert_ok!(write_bytes(&mut mock, &input).await) + } + + #[tokio::test] + async fn test_write_string() { + let input = "Hello, World!"; + let len = input.len() as u64; + let mut mock = Builder::new() + .write(&len.to_le_bytes()) + .write(&hex!("48656c6c6f2c20576f726c6421000000")) + .build(); + assert_ok!(write_bytes(&mut mock, &input).await) + } + + #[test] + fn padding_len_u64_max() { + assert_eq!(padding_len(u64::MAX), 1); + } +} diff --git a/tvix/nix-compat/src/wire/bytes/reader/mod.rs b/tvix/nix-compat/src/wire/bytes/reader/mod.rs new file mode 100644 index 0000000000..6bd376c06f --- /dev/null +++ b/tvix/nix-compat/src/wire/bytes/reader/mod.rs @@ -0,0 +1,684 @@ +use std::{ + future::Future, + io, + num::NonZeroU64, + ops::RangeBounds, + pin::Pin, + task::{self, ready, Poll}, +}; +use tokio::io::{AsyncBufRead, AsyncRead, AsyncReadExt, ReadBuf}; + +use trailer::{read_trailer, ReadTrailer, Trailer}; + +#[doc(hidden)] +pub use self::trailer::Pad; +pub(crate) use self::trailer::Tag; +mod trailer; + +/// Reads a "bytes wire packet" from the underlying reader. +/// The format is the same as in [crate::wire::bytes::read_bytes], +/// however this structure provides a [AsyncRead] interface, +/// allowing to not having to pass around the entire payload in memory. +/// +/// It is constructed by reading a size with [BytesReader::new], +/// and yields payload data until the end of the packet is reached. +/// +/// It will not return the final bytes before all padding has been successfully +/// consumed as well, but the full length of the reader must be consumed. +/// +/// If the data is not read all the way to the end, or an error is encountered, +/// the underlying reader is no longer usable and might return garbage. +#[derive(Debug)] +#[allow(private_bounds)] +pub struct BytesReader<R, T: Tag = Pad> { + state: State<R, T>, +} + +/// Split the `user_len` into `body_len` and `tail_len`, which are respectively +/// the non-terminal 8-byte blocks, and the ≤8 bytes of user data contained in +/// the trailer block. +#[inline(always)] +fn split_user_len(user_len: NonZeroU64) -> (u64, u8) { + let n = user_len.get() - 1; + let body_len = n & !7; + let tail_len = (n & 7) as u8 + 1; + (body_len, tail_len) +} + +#[derive(Debug)] +enum State<R, T: Tag> { + /// Full 8-byte blocks are being read and released to the caller. + /// NOTE: The final 8-byte block is *always* part of the trailer. + Body { + reader: Option<R>, + consumed: u64, + /// The total length of all user data contained in both the body and trailer. + user_len: NonZeroU64, + }, + /// The trailer is in the process of being read. + ReadTrailer(ReadTrailer<R, T>), + /// The trailer has been fully read and validated, + /// and data can now be released to the caller. + ReleaseTrailer { consumed: u8, data: Trailer }, +} + +impl<R> BytesReader<R> +where + R: AsyncRead + Unpin, +{ + /// Constructs a new BytesReader, using the underlying passed reader. + pub async fn new<S: RangeBounds<u64>>(reader: R, allowed_size: S) -> io::Result<Self> { + BytesReader::new_internal(reader, allowed_size).await + } +} + +#[allow(private_bounds)] +impl<R, T: Tag> BytesReader<R, T> +where + R: AsyncRead + Unpin, +{ + /// Constructs a new BytesReader, using the underlying passed reader. + pub(crate) async fn new_internal<S: RangeBounds<u64>>( + mut reader: R, + allowed_size: S, + ) -> io::Result<Self> { + let size = reader.read_u64_le().await?; + + if !allowed_size.contains(&size) { + return Err(io::Error::new(io::ErrorKind::InvalidData, "invalid size")); + } + + Ok(Self { + state: match NonZeroU64::new(size) { + Some(size) => State::Body { + reader: Some(reader), + consumed: 0, + user_len: size, + }, + None => State::ReleaseTrailer { + consumed: 0, + data: read_trailer::<R, T>(reader, 0).await?, + }, + }, + }) + } + + /// Returns whether there is any remaining data to be read. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Remaining data length, ie not including data already read. + /// + /// If the size has not been read yet, this is [None]. + pub fn len(&self) -> u64 { + match self.state { + State::Body { + consumed, user_len, .. + } => user_len.get() - consumed, + State::ReadTrailer(ref fut) => fut.len() as u64, + State::ReleaseTrailer { consumed, ref data } => data.len() as u64 - consumed as u64, + } + } +} + +#[allow(private_bounds)] +impl<R: AsyncRead + Unpin, T: Tag> AsyncRead for BytesReader<R, T> { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut task::Context, + buf: &mut ReadBuf, + ) -> Poll<io::Result<()>> { + let this = &mut self.state; + + loop { + match this { + State::Body { + reader, + consumed, + user_len, + } => { + let (body_len, tail_len) = split_user_len(*user_len); + let remaining = body_len - *consumed; + + let reader = if remaining == 0 { + let reader = reader.take().unwrap(); + *this = State::ReadTrailer(read_trailer(reader, tail_len)); + continue; + } else { + Pin::new(reader.as_mut().unwrap()) + }; + + let mut bytes_read = 0; + ready!(with_limited(buf, remaining, |buf| { + let ret = reader.poll_read(cx, buf); + bytes_read = buf.initialized().len(); + ret + }))?; + + *consumed += bytes_read as u64; + + return if bytes_read != 0 { + Ok(()) + } else { + Err(io::ErrorKind::UnexpectedEof.into()) + } + .into(); + } + State::ReadTrailer(fut) => { + *this = State::ReleaseTrailer { + consumed: 0, + data: ready!(Pin::new(fut).poll(cx))?, + }; + } + State::ReleaseTrailer { consumed, data } => { + let data = &data[*consumed as usize..]; + let data = &data[..usize::min(data.len(), buf.remaining())]; + + buf.put_slice(data); + *consumed += data.len() as u8; + + return Ok(()).into(); + } + } + } + } +} + +#[allow(private_bounds)] +impl<R: AsyncBufRead + Unpin, T: Tag> AsyncBufRead for BytesReader<R, T> { + fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut task::Context) -> Poll<io::Result<&[u8]>> { + let this = &mut self.get_mut().state; + + loop { + match this { + // This state comes *after* the following case, + // but we can't keep it in logical order because + // that would lengthen the borrow lifetime. + State::Body { + reader, + consumed, + user_len, + } if { + let (body_len, _) = split_user_len(*user_len); + let remaining = body_len - *consumed; + + remaining == 0 + } => + { + let reader = reader.take().unwrap(); + let (_, tail_len) = split_user_len(*user_len); + + *this = State::ReadTrailer(read_trailer(reader, tail_len)); + } + State::Body { + reader, + consumed, + user_len, + } => { + let (body_len, _) = split_user_len(*user_len); + let remaining = body_len - *consumed; + + let reader = Pin::new(reader.as_mut().unwrap()); + + match ready!(reader.poll_fill_buf(cx))? { + &[] => { + return Err(io::ErrorKind::UnexpectedEof.into()).into(); + } + mut buf => { + if buf.len() as u64 > remaining { + buf = &buf[..remaining as usize]; + } + + return Ok(buf).into(); + } + } + } + State::ReadTrailer(fut) => { + *this = State::ReleaseTrailer { + consumed: 0, + data: ready!(Pin::new(fut).poll(cx))?, + }; + } + State::ReleaseTrailer { consumed, data } => { + return Ok(&data[*consumed as usize..]).into(); + } + } + } + } + + fn consume(mut self: Pin<&mut Self>, amt: usize) { + match &mut self.state { + State::Body { + reader, + consumed, + user_len, + } => { + let reader = Pin::new(reader.as_mut().unwrap()); + let (body_len, _) = split_user_len(*user_len); + + *consumed = consumed + .checked_add(amt as u64) + .filter(|&consumed| consumed <= body_len) + .expect("consumed out of bounds"); + + reader.consume(amt); + } + State::ReadTrailer(_) => unreachable!(), + State::ReleaseTrailer { consumed, data } => { + *consumed = amt + .checked_add(*consumed as usize) + .filter(|&consumed| consumed <= data.len()) + .expect("consumed out of bounds") as u8; + } + } + } +} + +/// Make a limited version of `buf`, consisting only of up to `n` bytes of the unfilled section, and call `f` with it. +/// After `f` returns, we propagate the filled cursor advancement back to `buf`. +fn with_limited<R>(buf: &mut ReadBuf, n: u64, f: impl FnOnce(&mut ReadBuf) -> R) -> R { + let mut nbuf = buf.take(n.try_into().unwrap_or(usize::MAX)); + let ptr = nbuf.initialized().as_ptr(); + let ret = f(&mut nbuf); + + // SAFETY: `ReadBuf::take` only returns the *unfilled* section of `buf`, + // so anything filled is new, initialized data. + // + // We verify that `nbuf` still points to the same buffer, + // so we're sure it hasn't been swapped out. + unsafe { + // ensure our buffer hasn't been swapped out + assert_eq!(nbuf.initialized().as_ptr(), ptr); + + let n = nbuf.filled().len(); + buf.assume_init(n); + buf.advance(n); + } + + ret +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use crate::wire::bytes::{padding_len, write_bytes}; + use hex_literal::hex; + use lazy_static::lazy_static; + use rstest::rstest; + use tokio::io::{AsyncReadExt, BufReader}; + use tokio_test::io::Builder; + + use super::*; + + /// The maximum length of bytes packets we're willing to accept in the test + /// cases. + const MAX_LEN: u64 = 1024; + + lazy_static! { + pub static ref LARGE_PAYLOAD: Vec<u8> = (0..255).collect::<Vec<u8>>().repeat(4 * 1024); + } + + /// Helper function, calling the (simpler) write_bytes with the payload. + /// We use this to create data we want to read from the wire. + async fn produce_packet_bytes(payload: &[u8]) -> Vec<u8> { + let mut exp = vec![]; + write_bytes(&mut exp, payload).await.unwrap(); + exp + } + + /// Read bytes packets of various length, and ensure read_to_end returns the + /// expected payload. + #[rstest] + #[case::empty(&[])] // empty bytes packet + #[case::size_1b(&[0xff])] // 1 bytes payload + #[case::size_8b(&hex!("0001020304050607"))] // 8 bytes payload (no padding) + #[case::size_9b(&hex!("000102030405060708"))] // 9 bytes payload (7 bytes padding) + #[case::size_1m(LARGE_PAYLOAD.as_slice())] // larger bytes packet + #[tokio::test] + async fn read_payload_correct(#[case] payload: &[u8]) { + let mut mock = Builder::new() + .read(&produce_packet_bytes(payload).await) + .build(); + + let mut r = BytesReader::new(&mut mock, ..=LARGE_PAYLOAD.len() as u64) + .await + .unwrap(); + let mut buf = Vec::new(); + r.read_to_end(&mut buf).await.expect("must succeed"); + + assert_eq!(payload, &buf[..]); + } + + /// Read bytes packets of various length, and ensure copy_buf reads the + /// expected payload. + #[rstest] + #[case::empty(&[])] // empty bytes packet + #[case::size_1b(&[0xff])] // 1 bytes payload + #[case::size_8b(&hex!("0001020304050607"))] // 8 bytes payload (no padding) + #[case::size_9b(&hex!("000102030405060708"))] // 9 bytes payload (7 bytes padding) + #[case::size_1m(LARGE_PAYLOAD.as_slice())] // larger bytes packet + #[tokio::test] + async fn read_payload_correct_readbuf(#[case] payload: &[u8]) { + let mut mock = BufReader::new( + Builder::new() + .read(&produce_packet_bytes(payload).await) + .build(), + ); + + let mut r = BytesReader::new(&mut mock, ..=LARGE_PAYLOAD.len() as u64) + .await + .unwrap(); + + let mut buf = Vec::new(); + tokio::io::copy_buf(&mut r, &mut buf) + .await + .expect("copy_buf must succeed"); + + assert_eq!(payload, &buf[..]); + } + + /// Fail if the bytes packet is larger than allowed + #[tokio::test] + async fn read_bigger_than_allowed_fail() { + let payload = LARGE_PAYLOAD.as_slice(); + let mut mock = Builder::new() + .read(&produce_packet_bytes(payload).await[0..8]) // We stop reading after the size packet + .build(); + + assert_eq!( + BytesReader::new(&mut mock, ..2048) + .await + .unwrap_err() + .kind(), + io::ErrorKind::InvalidData + ); + } + + /// Fail if the bytes packet is smaller than allowed + #[tokio::test] + async fn read_smaller_than_allowed_fail() { + let payload = &[0x00, 0x01, 0x02]; + let mut mock = Builder::new() + .read(&produce_packet_bytes(payload).await[0..8]) // We stop reading after the size packet + .build(); + + assert_eq!( + BytesReader::new(&mut mock, 1024..2048) + .await + .unwrap_err() + .kind(), + io::ErrorKind::InvalidData + ); + } + + /// Read the trailer immediately if there is no payload. + #[tokio::test] + async fn read_trailer_immediately() { + use crate::nar::wire::PadPar; + + let mut mock = Builder::new() + .read(&[0; 8]) + .read(&PadPar::PATTERN[8..]) + .build(); + + BytesReader::<_, PadPar>::new_internal(&mut mock, ..) + .await + .unwrap(); + + // The mock reader will panic if dropped without reading all data. + } + + /// Read the trailer even if we only read the exact payload size. + #[tokio::test] + async fn read_exact_trailer() { + use crate::nar::wire::PadPar; + + let mut mock = Builder::new() + .read(&16u64.to_le_bytes()) + .read(&[0x55; 16]) + .read(&PadPar::PATTERN[8..]) + .build(); + + let mut reader = BytesReader::<_, PadPar>::new_internal(&mut mock, ..) + .await + .unwrap(); + + let mut buf = [0; 16]; + reader.read_exact(&mut buf).await.unwrap(); + assert_eq!(buf, [0x55; 16]); + + // The mock reader will panic if dropped without reading all data. + } + + /// Fail if the padding is not all zeroes + #[tokio::test] + async fn read_fail_if_nonzero_padding() { + let payload = &[0x00, 0x01, 0x02]; + let mut packet_bytes = produce_packet_bytes(payload).await; + // Flip some bits in the padding + packet_bytes[12] = 0xff; + let mut mock = Builder::new().read(&packet_bytes).build(); // We stop reading after the faulty bit + + let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap(); + let mut buf = Vec::new(); + + r.read_to_end(&mut buf).await.expect_err("must fail"); + } + + /// Start a 9 bytes payload packet, but have the underlying reader return + /// EOF in the middle of the size packet (after 4 bytes). + /// We should get an unexpected EOF error, already when trying to read the + /// first byte (of payload) + #[tokio::test] + async fn read_9b_eof_during_size() { + let payload = &hex!("FF0102030405060708"); + let mut mock = Builder::new() + .read(&produce_packet_bytes(payload).await[..4]) + .build(); + + assert_eq!( + BytesReader::new(&mut mock, ..MAX_LEN) + .await + .expect_err("must fail") + .kind(), + io::ErrorKind::UnexpectedEof + ); + } + + /// Start a 9 bytes payload packet, but have the underlying reader return + /// EOF in the middle of the payload (4 bytes into the payload). + /// We should get an unexpected EOF error, after reading the first 4 bytes + /// (successfully). + #[tokio::test] + async fn read_9b_eof_during_payload() { + let payload = &hex!("FF0102030405060708"); + let mut mock = Builder::new() + .read(&produce_packet_bytes(payload).await[..8 + 4]) + .build(); + + let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap(); + let mut buf = [0; 9]; + + r.read_exact(&mut buf[..4]).await.expect("must succeed"); + + assert_eq!( + r.read_exact(&mut buf[4..=4]) + .await + .expect_err("must fail") + .kind(), + std::io::ErrorKind::UnexpectedEof + ); + } + + /// Start a 9 bytes payload packet, but don't supply the necessary padding. + /// This is expected to always fail before returning the final data. + #[rstest] + #[case::before_padding(8 + 9)] + #[case::during_padding(8 + 9 + 2)] + #[case::after_padding(8 + 9 + padding_len(9) as usize - 1)] + #[tokio::test] + async fn read_9b_eof_after_payload(#[case] offset: usize) { + let payload = &hex!("FF0102030405060708"); + let mut mock = Builder::new() + .read(&produce_packet_bytes(payload).await[..offset]) + .build(); + + let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap(); + + // read_exact of the payload *body* will succeed, but a subsequent read will + // return UnexpectedEof error. + assert_eq!(r.read_exact(&mut [0; 8]).await.unwrap(), 8); + assert_eq!( + r.read_exact(&mut [0]).await.unwrap_err().kind(), + std::io::ErrorKind::UnexpectedEof + ); + } + + /// Start a 9 bytes payload packet, but return an error after a certain position. + /// Ensure that error is propagated. + #[rstest] + #[case::during_size(4)] + #[case::before_payload(8)] + #[case::during_payload(8 + 4)] + #[case::before_padding(8 + 4)] + #[case::during_padding(8 + 9 + 2)] + #[tokio::test] + async fn propagate_error_from_reader(#[case] offset: usize) { + let payload = &hex!("FF0102030405060708"); + let mut mock = Builder::new() + .read(&produce_packet_bytes(payload).await[..offset]) + .read_error(std::io::Error::new(std::io::ErrorKind::Other, "foo")) + .build(); + + // Either length reading or data reading can fail, depending on which test case we're in. + let err: io::Error = async { + let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await?; + let mut buf = Vec::new(); + + r.read_to_end(&mut buf).await?; + + Ok(()) + } + .await + .expect_err("must fail"); + + assert_eq!( + err.kind(), + std::io::ErrorKind::Other, + "error kind must match" + ); + + assert_eq!( + err.into_inner().unwrap().to_string(), + "foo", + "error payload must contain foo" + ); + } + + /// Start a 9 bytes payload packet, but return an error after a certain position. + /// Ensure that error is propagated (AsyncReadBuf case) + #[rstest] + #[case::during_size(4)] + #[case::before_payload(8)] + #[case::during_payload(8 + 4)] + #[case::before_padding(8 + 4)] + #[case::during_padding(8 + 9 + 2)] + #[tokio::test] + async fn propagate_error_from_reader_buffered(#[case] offset: usize) { + let payload = &hex!("FF0102030405060708"); + let mock = Builder::new() + .read(&produce_packet_bytes(payload).await[..offset]) + .read_error(std::io::Error::new(std::io::ErrorKind::Other, "foo")) + .build(); + let mut mock = BufReader::new(mock); + + // Either length reading or data reading can fail, depending on which test case we're in. + let err: io::Error = async { + let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await?; + let mut buf = Vec::new(); + + tokio::io::copy_buf(&mut r, &mut buf).await?; + + Ok(()) + } + .await + .expect_err("must fail"); + + assert_eq!( + err.kind(), + std::io::ErrorKind::Other, + "error kind must match" + ); + + assert_eq!( + err.into_inner().unwrap().to_string(), + "foo", + "error payload must contain foo" + ); + } + + /// If there's an error right after the padding, we don't propagate it, as + /// we're done reading. We just return EOF. + #[tokio::test] + async fn no_error_after_eof() { + let payload = &hex!("FF0102030405060708"); + let mut mock = Builder::new() + .read(&produce_packet_bytes(payload).await) + .read_error(std::io::Error::new(std::io::ErrorKind::Other, "foo")) + .build(); + + let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap(); + let mut buf = Vec::new(); + + r.read_to_end(&mut buf).await.expect("must succeed"); + assert_eq!(buf.as_slice(), payload); + } + + /// If there's an error right after the padding, we don't propagate it, as + /// we're done reading. We just return EOF. + #[tokio::test] + async fn no_error_after_eof_buffered() { + let payload = &hex!("FF0102030405060708"); + let mock = Builder::new() + .read(&produce_packet_bytes(payload).await) + .read_error(std::io::Error::new(std::io::ErrorKind::Other, "foo")) + .build(); + let mut mock = BufReader::new(mock); + + let mut r = BytesReader::new(&mut mock, ..MAX_LEN).await.unwrap(); + let mut buf = Vec::new(); + + tokio::io::copy_buf(&mut r, &mut buf) + .await + .expect("must succeed"); + assert_eq!(buf.as_slice(), payload); + } + + /// Introduce various stalls in various places of the packet, to ensure we + /// handle these cases properly, too. + #[rstest] + #[case::beginning(0)] + #[case::before_payload(8)] + #[case::during_payload(8 + 4)] + #[case::before_padding(8 + 4)] + #[case::during_padding(8 + 9 + 2)] + #[tokio::test] + async fn read_payload_correct_pending(#[case] offset: usize) { + let payload = &hex!("FF0102030405060708"); + let mut mock = Builder::new() + .read(&produce_packet_bytes(payload).await[..offset]) + .wait(Duration::from_nanos(0)) + .read(&produce_packet_bytes(payload).await[offset..]) + .build(); + + let mut r = BytesReader::new(&mut mock, ..=LARGE_PAYLOAD.len() as u64) + .await + .unwrap(); + let mut buf = Vec::new(); + r.read_to_end(&mut buf).await.expect("must succeed"); + + assert_eq!(payload, &buf[..]); + } +} diff --git a/tvix/nix-compat/src/wire/bytes/reader/trailer.rs b/tvix/nix-compat/src/wire/bytes/reader/trailer.rs new file mode 100644 index 0000000000..3a5bb75e71 --- /dev/null +++ b/tvix/nix-compat/src/wire/bytes/reader/trailer.rs @@ -0,0 +1,197 @@ +use std::{ + fmt::Debug, + future::Future, + marker::PhantomData, + ops::Deref, + pin::Pin, + task::{self, ready, Poll}, +}; + +use tokio::io::{self, AsyncRead, ReadBuf}; + +/// Trailer represents up to 8 bytes of data read as part of the trailer block(s) +#[derive(Debug)] +pub(crate) struct Trailer { + data_len: u8, + buf: [u8; 8], +} + +impl Deref for Trailer { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.buf[..self.data_len as usize] + } +} + +/// Tag defines a "trailer tag": specific, fixed bytes that must follow wire data. +pub(crate) trait Tag { + /// The expected suffix + /// + /// The first 8 bytes may be ignored, and it must be an 8-byte aligned size. + const PATTERN: &'static [u8]; + + /// Suitably sized buffer for reading [Self::PATTERN] + /// + /// HACK: This is a workaround for const generics limitations. + type Buf: AsRef<[u8]> + AsMut<[u8]> + Debug + Unpin; + + /// Make an instance of [Self::Buf] + fn make_buf() -> Self::Buf; +} + +#[derive(Debug)] +pub enum Pad {} + +impl Tag for Pad { + const PATTERN: &'static [u8] = &[0; 8]; + + type Buf = [u8; 8]; + + fn make_buf() -> Self::Buf { + [0; 8] + } +} + +#[derive(Debug)] +pub(crate) struct ReadTrailer<R, T: Tag> { + reader: R, + data_len: u8, + filled: u8, + buf: T::Buf, + _phantom: PhantomData<fn(T) -> T>, +} + +/// read_trailer returns a [Future] that reads a trailer with a given [Tag] from `reader` +pub(crate) fn read_trailer<R: AsyncRead + Unpin, T: Tag>( + reader: R, + data_len: u8, +) -> ReadTrailer<R, T> { + assert!(data_len <= 8, "payload in trailer must be <= 8 bytes"); + + let buf = T::make_buf(); + assert_eq!(buf.as_ref().len(), T::PATTERN.len()); + assert_eq!(T::PATTERN.len() % 8, 0); + + ReadTrailer { + reader, + data_len, + filled: if data_len != 0 { 0 } else { 8 }, + buf, + _phantom: PhantomData, + } +} + +impl<R, T: Tag> ReadTrailer<R, T> { + pub fn len(&self) -> u8 { + self.data_len + } +} + +impl<R: AsyncRead + Unpin, T: Tag> Future for ReadTrailer<R, T> { + type Output = io::Result<Trailer>; + + fn poll(mut self: Pin<&mut Self>, cx: &mut task::Context) -> Poll<Self::Output> { + let this = &mut *self; + + loop { + if this.filled >= this.data_len { + let check_range = || this.data_len as usize..this.filled as usize; + + if this.buf.as_ref()[check_range()] != T::PATTERN[check_range()] { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "invalid trailer", + )) + .into(); + } + } + + if this.filled as usize == T::PATTERN.len() { + let mut buf = [0; 8]; + buf.copy_from_slice(&this.buf.as_ref()[..8]); + + return Ok(Trailer { + data_len: this.data_len, + buf, + }) + .into(); + } + + let mut buf = ReadBuf::new(this.buf.as_mut()); + buf.advance(this.filled as usize); + + ready!(Pin::new(&mut this.reader).poll_read(cx, &mut buf))?; + + this.filled = { + let filled = buf.filled().len() as u8; + + if filled == this.filled { + return Err(io::ErrorKind::UnexpectedEof.into()).into(); + } + + filled + }; + } + } +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use super::*; + + #[tokio::test] + async fn unexpected_eof() { + let reader = tokio_test::io::Builder::new() + .read(&[0xed]) + .wait(Duration::ZERO) + .read(&[0xef, 0x00]) + .build(); + + assert_eq!( + read_trailer::<_, Pad>(reader, 2).await.unwrap_err().kind(), + io::ErrorKind::UnexpectedEof + ); + } + + #[tokio::test] + async fn invalid_padding() { + let reader = tokio_test::io::Builder::new() + .read(&[0xed]) + .wait(Duration::ZERO) + .read(&[0xef, 0x01, 0x00]) + .wait(Duration::ZERO) + .build(); + + assert_eq!( + read_trailer::<_, Pad>(reader, 2).await.unwrap_err().kind(), + io::ErrorKind::InvalidData + ); + } + + #[tokio::test] + async fn success() { + let reader = tokio_test::io::Builder::new() + .read(&[0xed]) + .wait(Duration::ZERO) + .read(&[0xef, 0x00]) + .wait(Duration::ZERO) + .read(&[0x00, 0x00, 0x00, 0x00, 0x00]) + .build(); + + assert_eq!( + &*read_trailer::<_, Pad>(reader, 2).await.unwrap(), + &[0xed, 0xef] + ); + } + + #[tokio::test] + async fn no_padding() { + assert!(read_trailer::<_, Pad>(io::empty(), 0) + .await + .unwrap() + .is_empty()); + } +} diff --git a/tvix/nix-compat/src/wire/bytes/writer.rs b/tvix/nix-compat/src/wire/bytes/writer.rs new file mode 100644 index 0000000000..f5632771e9 --- /dev/null +++ b/tvix/nix-compat/src/wire/bytes/writer.rs @@ -0,0 +1,538 @@ +use pin_project_lite::pin_project; +use std::task::{ready, Poll}; + +use tokio::io::AsyncWrite; + +use super::{padding_len, EMPTY_BYTES, LEN_SIZE}; + +pin_project! { + /// Writes a "bytes wire packet" to the underlying writer. + /// The format is the same as in [crate::wire::bytes::write_bytes], + /// however this structure provides a [AsyncWrite] interface, + /// allowing to not having to pass around the entire payload in memory. + /// + /// It internally takes care of writing (non-payload) framing (size and + /// padding). + /// + /// During construction, the expected payload size needs to be provided. + /// + /// After writing the payload to it, the user MUST call flush (or shutdown), + /// which will validate the written payload size to match, and write the + /// necessary padding. + /// + /// In case flush is not called at the end, invalid data might be sent + /// silently. + /// + /// The underlying writer returning `Ok(0)` is considered an EOF situation, + /// which is stronger than the "typically means the underlying object is no + /// longer able to accept bytes" interpretation from the docs. If such a + /// situation occurs, an error is returned. + /// + /// The struct holds three fields, the underlying writer, the (expected) + /// payload length, and an enum, tracking the state. + pub struct BytesWriter<W> + where + W: AsyncWrite, + { + #[pin] + inner: W, + payload_len: u64, + state: BytesPacketPosition, + } +} + +/// Models the position inside a "bytes wire packet" that the writer is in. +/// It can be in three different stages, inside size, payload or padding fields. +/// The number tracks the number of bytes written inside the specific field. +/// There shall be no ambiguous states, at the end of a stage we immediately +/// move to the beginning of the next one: +/// - Size(LEN_SIZE) must be expressed as Payload(0) +/// - Payload(self.payload_len) must be expressed as Padding(0) +/// +/// Padding(padding_len) means we're at the end of the bytes wire packet. +#[derive(Clone, Debug, PartialEq, Eq)] +enum BytesPacketPosition { + Size(usize), + Payload(u64), + Padding(usize), +} + +impl<W> BytesWriter<W> +where + W: AsyncWrite, +{ + /// Constructs a new BytesWriter, using the underlying passed writer. + pub fn new(w: W, payload_len: u64) -> Self { + Self { + inner: w, + payload_len, + state: BytesPacketPosition::Size(0), + } + } +} + +/// Returns an error if the passed usize is 0. +#[inline] +fn ensure_nonzero_bytes_written(bytes_written: usize) -> Result<usize, std::io::Error> { + if bytes_written == 0 { + Err(std::io::Error::new( + std::io::ErrorKind::WriteZero, + "underlying writer accepted 0 bytes", + )) + } else { + Ok(bytes_written) + } +} + +impl<W> AsyncWrite for BytesWriter<W> +where + W: AsyncWrite, +{ + fn poll_write( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + buf: &[u8], + ) -> Poll<Result<usize, std::io::Error>> { + // Use a loop, so we can deal with (multiple) state transitions. + let mut this = self.project(); + + loop { + match *this.state { + BytesPacketPosition::Size(LEN_SIZE) => unreachable!(), + BytesPacketPosition::Size(pos) => { + let size_field = &this.payload_len.to_le_bytes(); + + let bytes_written = ensure_nonzero_bytes_written(ready!(this + .inner + .as_mut() + .poll_write(cx, &size_field[pos..]))?)?; + + let new_pos = pos + bytes_written; + if new_pos == LEN_SIZE { + *this.state = BytesPacketPosition::Payload(0); + } else { + *this.state = BytesPacketPosition::Size(new_pos); + } + } + BytesPacketPosition::Payload(pos) => { + // Ensure we still have space for more payload + if pos + (buf.len() as u64) > *this.payload_len { + return Poll::Ready(Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "tried to write excess bytes", + ))); + } + let bytes_written = ready!(this.inner.as_mut().poll_write(cx, buf))?; + ensure_nonzero_bytes_written(bytes_written)?; + let new_pos = pos + (bytes_written as u64); + if new_pos == *this.payload_len { + *this.state = BytesPacketPosition::Padding(0) + } else { + *this.state = BytesPacketPosition::Payload(new_pos) + } + + return Poll::Ready(Ok(bytes_written)); + } + // If we're already in padding state, there should be no more payload left to write! + BytesPacketPosition::Padding(_pos) => { + return Poll::Ready(Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "tried to write excess bytes", + ))) + } + } + } + } + + fn poll_flush( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll<Result<(), std::io::Error>> { + let mut this = self.project(); + + loop { + match *this.state { + BytesPacketPosition::Size(LEN_SIZE) => unreachable!(), + BytesPacketPosition::Size(pos) => { + // More bytes to write in the size field + let size_field = &this.payload_len.to_le_bytes()[..]; + let bytes_written = ensure_nonzero_bytes_written(ready!(this + .inner + .as_mut() + .poll_write(cx, &size_field[pos..]))?)?; + let new_pos = pos + bytes_written; + if new_pos == LEN_SIZE { + // Size field written, now ready to receive payload + *this.state = BytesPacketPosition::Payload(0); + } else { + *this.state = BytesPacketPosition::Size(new_pos); + } + } + BytesPacketPosition::Payload(_pos) => { + // If we're at position 0 and want to write 0 bytes of payload + // in total, we can transition to padding. + // Otherwise, break, as we're expecting more payload to + // be written. + if *this.payload_len == 0 { + *this.state = BytesPacketPosition::Padding(0); + } else { + break; + } + } + BytesPacketPosition::Padding(pos) => { + // Write remaining padding, if there is padding to write. + let total_padding_len = padding_len(*this.payload_len) as usize; + + if pos != total_padding_len { + let bytes_written = ensure_nonzero_bytes_written(ready!(this + .inner + .as_mut() + .poll_write(cx, &EMPTY_BYTES[pos..total_padding_len]))?)?; + *this.state = BytesPacketPosition::Padding(pos + bytes_written); + } else { + // everything written, break + break; + } + } + } + } + // Flush the underlying writer. + this.inner.as_mut().poll_flush(cx) + } + + fn poll_shutdown( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll<Result<(), std::io::Error>> { + // Call flush. + ready!(self.as_mut().poll_flush(cx))?; + + let this = self.project(); + + // After a flush, being inside the padding state, and at the end of the padding + // is the only way to prevent a dirty shutdown. + if let BytesPacketPosition::Padding(pos) = *this.state { + let padding_len = padding_len(*this.payload_len) as usize; + if padding_len == pos { + // Shutdown the underlying writer + return this.inner.poll_shutdown(cx); + } + } + + // Shutdown the underlying writer, bubbling up any errors. + ready!(this.inner.poll_shutdown(cx))?; + + // return an error about unclean shutdown + Poll::Ready(Err(std::io::Error::new( + std::io::ErrorKind::BrokenPipe, + "unclean shutdown", + ))) + } +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use crate::wire::bytes::write_bytes; + use hex_literal::hex; + use lazy_static::lazy_static; + use tokio::io::AsyncWriteExt; + use tokio_test::{assert_err, assert_ok, io::Builder}; + + use super::*; + + lazy_static! { + pub static ref LARGE_PAYLOAD: Vec<u8> = (0..255).collect::<Vec<u8>>().repeat(4 * 1024); + } + + /// Helper function, calling the (simpler) write_bytes with the payload. + /// We use this to create data we want to see on the wire. + async fn produce_exp_bytes(payload: &[u8]) -> Vec<u8> { + let mut exp = vec![]; + write_bytes(&mut exp, payload).await.unwrap(); + exp + } + + /// Write an empty bytes packet. + #[tokio::test] + async fn write_empty() { + let payload = &[]; + let mut mock = Builder::new() + .write(&produce_exp_bytes(payload).await) + .build(); + + let mut w = BytesWriter::new(&mut mock, 0); + assert_ok!(w.write_all(&[]).await, "write all data"); + assert_ok!(w.flush().await, "flush"); + } + + /// Write an empty bytes packet, not calling write. + #[tokio::test] + async fn write_empty_only_flush() { + let payload = &[]; + let mut mock = Builder::new() + .write(&produce_exp_bytes(payload).await) + .build(); + + let mut w = BytesWriter::new(&mut mock, 0); + assert_ok!(w.flush().await, "flush"); + } + + /// Write an empty bytes packet, not calling write or flush, only shutdown. + #[tokio::test] + async fn write_empty_only_shutdown() { + let payload = &[]; + let mut mock = Builder::new() + .write(&produce_exp_bytes(payload).await) + .build(); + + let mut w = BytesWriter::new(&mut mock, 0); + assert_ok!(w.shutdown().await, "shutdown"); + } + + /// Write a 1 bytes packet + #[tokio::test] + async fn write_1b() { + let payload = &[0xff]; + + let mut mock = Builder::new() + .write(&produce_exp_bytes(payload).await) + .build(); + + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + assert_ok!(w.write_all(payload).await); + assert_ok!(w.flush().await, "flush"); + } + + /// Write a 8 bytes payload (no padding) + #[tokio::test] + async fn write_8b() { + let payload = &hex!("0001020304050607"); + + let mut mock = Builder::new() + .write(&produce_exp_bytes(payload).await) + .build(); + + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + assert_ok!(w.write_all(payload).await); + assert_ok!(w.flush().await, "flush"); + } + + /// Write a 9 bytes payload (7 bytes padding) + #[tokio::test] + async fn write_9b() { + let payload = &hex!("000102030405060708"); + + let mut mock = Builder::new() + .write(&produce_exp_bytes(payload).await) + .build(); + + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + assert_ok!(w.write_all(payload).await); + assert_ok!(w.flush().await, "flush"); + } + + /// Write a 9 bytes packet very granularly, with a lot of flushing in between, + /// and a shutdown at the end. + #[tokio::test] + async fn write_9b_flush() { + let payload = &hex!("000102030405060708"); + let exp_bytes = produce_exp_bytes(payload).await; + + let mut mock = Builder::new().write(&exp_bytes).build(); + + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + assert_ok!(w.flush().await); + + assert_ok!(w.write_all(&payload[..4]).await); + assert_ok!(w.flush().await); + + // empty write, cause why not + assert_ok!(w.write_all(&[]).await); + assert_ok!(w.flush().await); + + assert_ok!(w.write_all(&payload[4..]).await); + assert_ok!(w.flush().await); + assert_ok!(w.shutdown().await); + } + + /// Write a 9 bytes packet, but cause the sink to only accept half of the + /// padding, ensuring we correctly write (only) the rest of the padding later. + /// We write another 2 bytes of "bait", where a faulty implementation (pre + /// cl/11384) would put too many null bytes. + #[tokio::test] + async fn write_9b_write_padding_2steps() { + let payload = &hex!("000102030405060708"); + let exp_bytes = produce_exp_bytes(payload).await; + + let mut mock = Builder::new() + .write(&exp_bytes[0..8]) // size + .write(&exp_bytes[8..17]) // payload + .write(&exp_bytes[17..19]) // padding (2 of 7 bytes) + // insert a wait to prevent Mock from merging the two writes into one + .wait(Duration::from_nanos(1)) + .write(&hex!("0000000000ffff")) // padding (5 of 7 bytes, plus 2 bytes of "bait") + .build(); + + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + assert_ok!(w.write_all(&payload[..]).await); + assert_ok!(w.flush().await); + // Write bait + assert_ok!(mock.write_all(&hex!("ffff")).await); + } + + /// Write a larger bytes packet + #[tokio::test] + async fn write_1m() { + let payload = LARGE_PAYLOAD.as_slice(); + let exp_bytes = produce_exp_bytes(payload).await; + + let mut mock = Builder::new().write(&exp_bytes).build(); + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + + assert_ok!(w.write_all(payload).await); + assert_ok!(w.flush().await, "flush"); + } + + /// Not calling flush at the end, but shutdown is also ok if we wrote all + /// bytes we promised to write (as shutdown implies flush) + #[tokio::test] + async fn write_shutdown_without_flush_end() { + let payload = &[0xf0, 0xff]; + let exp_bytes = produce_exp_bytes(payload).await; + + let mut mock = Builder::new().write(&exp_bytes).build(); + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + + // call flush to write the size field + assert_ok!(w.flush().await); + + // write payload + assert_ok!(w.write_all(payload).await); + + // call shutdown + assert_ok!(w.shutdown().await); + } + + /// Writing more bytes than previously signalled should fail. + #[tokio::test] + async fn write_more_than_signalled_fail() { + let mut buf = Vec::new(); + let mut w = BytesWriter::new(&mut buf, 2); + + assert_err!(w.write_all(&hex!("000102")).await); + } + /// Writing more bytes than previously signalled, but in two parts + #[tokio::test] + async fn write_more_than_signalled_split_fail() { + let mut buf = Vec::new(); + let mut w = BytesWriter::new(&mut buf, 2); + + // write two bytes + assert_ok!(w.write_all(&hex!("0001")).await); + + // write the excess byte. + assert_err!(w.write_all(&hex!("02")).await); + } + + /// Writing more bytes than previously signalled, but flushing after the + /// signalled amount should fail. + #[tokio::test] + async fn write_more_than_signalled_flush_fail() { + let mut buf = Vec::new(); + let mut w = BytesWriter::new(&mut buf, 2); + + // write two bytes, then flush + assert_ok!(w.write_all(&hex!("0001")).await); + assert_ok!(w.flush().await); + + // write the excess byte. + assert_err!(w.write_all(&hex!("02")).await); + } + + /// Calling shutdown while not having written all bytes that were promised + /// returns an error. + /// Note there's still cases of silent corruption if the user doesn't call + /// shutdown explicitly (only drops). + #[tokio::test] + async fn premature_shutdown() { + let payload = &[0xf0, 0xff]; + let mut buf = Vec::new(); + let mut w = BytesWriter::new(&mut buf, payload.len() as u64); + + // call flush to write the size field + assert_ok!(w.flush().await); + + // write half of the payload (!) + assert_ok!(w.write_all(&payload[0..1]).await); + + // call shutdown, ensure it fails + assert_err!(w.shutdown().await); + } + + /// Write to a Writer that fails to write during the size packet (after 4 bytes). + /// Ensure this error gets propagated on the first call to write. + #[tokio::test] + async fn inner_writer_fail_during_size_firstwrite() { + let payload = &[0xf0]; + + let mut mock = Builder::new() + .write(&1u32.to_le_bytes()) + .write_error(std::io::Error::new(std::io::ErrorKind::Other, "🍿")) + .build(); + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + + assert_err!(w.write_all(payload).await); + } + + /// Write to a Writer that fails to write during the size packet (after 4 bytes). + /// Ensure this error gets propagated during an initial flush + #[tokio::test] + async fn inner_writer_fail_during_size_initial_flush() { + let payload = &[0xf0]; + + let mut mock = Builder::new() + .write(&1u32.to_le_bytes()) + .write_error(std::io::Error::new(std::io::ErrorKind::Other, "🍿")) + .build(); + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + + assert_err!(w.flush().await); + } + + /// Write to a writer that fails to write during the payload (after 9 bytes). + /// Ensure this error gets propagated when we're writing this byte. + #[tokio::test] + async fn inner_writer_fail_during_write() { + let payload = &hex!("f0ff"); + + let mut mock = Builder::new() + .write(&2u64.to_le_bytes()) + .write(&hex!("f0")) + .write_error(std::io::Error::new(std::io::ErrorKind::Other, "🍿")) + .build(); + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + + assert_ok!(w.write(&hex!("f0")).await); + assert_err!(w.write(&hex!("ff")).await); + } + + /// Write to a writer that fails to write during the padding (after 10 bytes). + /// Ensure this error gets propagated during a flush. + #[tokio::test] + async fn inner_writer_fail_during_padding_flush() { + let payload = &hex!("f0"); + + let mut mock = Builder::new() + .write(&1u64.to_le_bytes()) + .write(&hex!("f0")) + .write(&hex!("00")) + .write_error(std::io::Error::new(std::io::ErrorKind::Other, "🍿")) + .build(); + let mut w = BytesWriter::new(&mut mock, payload.len() as u64); + + assert_ok!(w.write(&hex!("f0")).await); + assert_err!(w.flush().await); + } +} diff --git a/tvix/nix-compat/src/wire/mod.rs b/tvix/nix-compat/src/wire/mod.rs new file mode 100644 index 0000000000..a197e3a1f4 --- /dev/null +++ b/tvix/nix-compat/src/wire/mod.rs @@ -0,0 +1,5 @@ +//! Module parsing and emitting the wire format used by Nix, both in the +//! nix-daemon protocol as well as in the NAR format. + +mod bytes; +pub use bytes::*; diff --git a/tvix/nix-compat/testdata/narinfo.zst b/tvix/nix-compat/testdata/narinfo.zst new file mode 100644 index 0000000000..361a422da8 --- /dev/null +++ b/tvix/nix-compat/testdata/narinfo.zst Binary files differ |