diff options
Diffstat (limited to 'tvix/nix-compat')
62 files changed, 6442 insertions, 0 deletions
diff --git a/tvix/nix-compat/Cargo.toml b/tvix/nix-compat/Cargo.toml new file mode 100644 index 000000000000..2e4bff71aa42 --- /dev/null +++ b/tvix/nix-compat/Cargo.toml @@ -0,0 +1,47 @@ +[package] +name = "nix-compat" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[features] +async = ["futures-util"] + +[dependencies] +bitflags = "2.4.1" +bstr = { version = "1.6.0", features = ["alloc", "unicode", "serde"] } +data-encoding = "2.3.3" +ed25519 = "2.2.3" +ed25519-dalek = "2.1.0" +futures-util = { version = "0.3.30", features = ["io"], optional = true } +glob = "0.3.0" +nom = "7.1.3" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +sha2 = "0.10.6" +thiserror = "1.0.38" + +[dev-dependencies] +futures = { version = "0.3.30", default-features = false, features = ["executor"] } +lazy_static = "1.4.0" +serde_json = "1.0" +test-case = "3.3.1" +criterion = { version = "0.5", features = ["html_reports"] } +hex-literal = "0.4.1" +pretty_assertions = "1.4.0" +zstd = "^0.9.0" + +[dev-dependencies.test-generator] +# This fork of test-generator adds support for cargo workspaces, see +# also https://github.com/frehberg/test-generator/pull/14 +git = "https://github.com/JamesGuthrie/test-generator.git" +rev = "82e799979980962aec1aa324ec6e0e4cad781f41" + +[[bench]] +name = "derivation_parse_aterm" +harness = false + +[[bench]] +name = "narinfo_parse" +harness = false diff --git a/tvix/nix-compat/benches/derivation_parse_aterm.rs b/tvix/nix-compat/benches/derivation_parse_aterm.rs new file mode 100644 index 000000000000..4ace7d4480f4 --- /dev/null +++ b/tvix/nix-compat/benches/derivation_parse_aterm.rs @@ -0,0 +1,31 @@ +use std::path::Path; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use nix_compat::derivation::Derivation; + +const RESOURCES_PATHS: &str = "src/derivation/tests/derivation_tests/ok"; + +fn bench_aterm_parser(c: &mut Criterion) { + for drv in [ + "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", + "292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv", + "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv", + "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv", + "9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv", + "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv", + "h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv", + "m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv", + "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", + "x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv", + ] { + let drv_path = Path::new(RESOURCES_PATHS).join(drv); + let drv_bytes = &std::fs::read(drv_path).unwrap(); + + c.bench_function(drv, |b| { + b.iter(|| Derivation::from_aterm_bytes(black_box(drv_bytes))) + }); + } +} + +criterion_group!(benches, bench_aterm_parser); +criterion_main!(benches); diff --git a/tvix/nix-compat/benches/narinfo_parse.rs b/tvix/nix-compat/benches/narinfo_parse.rs new file mode 100644 index 000000000000..7ffd24d12bc3 --- /dev/null +++ b/tvix/nix-compat/benches/narinfo_parse.rs @@ -0,0 +1,69 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput}; +use lazy_static::lazy_static; +use nix_compat::narinfo::NarInfo; +use std::{io, str}; + +const SAMPLE: &str = r#"StorePath: /nix/store/1pajsq519irjy86vli20bgq1wr1q3pny-banking-0.3.0 +URL: nar/0rdn027rxqbl42bv9jxhsipgq2hwqdapvwmdzligmzdmz2p9vybs.nar.xz +Compression: xz +FileHash: sha256:0rdn027rxqbl42bv9jxhsipgq2hwqdapvwmdzligmzdmz2p9vybs +FileSize: 92828 +NarHash: sha256:0cfnydzp132y69bh20dj76yfd6hc3qdyblbwr9hwn59vfmnb09m0 +NarSize: 173352 +References: 03d4ncyfh76mgs6sfayl8l6zzdhm219w-python3.9-mt-940-4.23.0 0rhbw783qcjxv3cqln1760i1lmz2yb67-gsettings-desktop-schemas-41.0 1dm9ndgg56ylawpcbdzkhl03fg6777rr-python3.9-six-1.16.0 1pajsq519irjy86vli20bgq1wr1q3pny-banking-0.3.0 2ccy5zc89zpc2aznqxgvzp4wm1bwj05n-bzip2-1.0.6.0.2-bin 32gy3pqk4n725lscdm622yzsg9np3xvs-python3.9-cryptography-36.0.0-dev 35chvqbr7vp9icdki0132fc6np09vrx5-python3.9-bleach-4.1.0 53abh5cz9zi4yh75lfzg99xqy0fdgj4i-python3.9-xmlschema-1.9.2 5p96sifyavb407mnharhyzlw6pn6km1b-glib-2.70.2-bin 6hil8z0zkqcgvaw1qwjyqa8qyaa1lm3k-python3.9-pycairo-1.20.1 803ffb21rv4af521pplb72zjm1ygm9kk-python3.9-pyparsing-2.4.7 al95l8psvmq5di3vdwa75n8w2m0sj2sy-gdk-pixbuf-2.42.6 b09371lq1jjrv43h8jpp82v23igndsn2-python3.9-fints-3.0.1 b53hk557pdk5mq4lv1zrh71a54qazbsm-python3.9-certifi-2021.10.08 bl0cwvwgch92cfsnli4dsah2gxgdickp-gtk+3-3.24.30 cfkq9wi7ypqk26c75dzic5v3nxlzyi58-python3.9-cryptography-36.0.0 cyhg57whqvrx7xf7fvn70dr5836y7zak-python3.9-sepaxml-2.4.1 d810g729g1c4lvp3nv1n3ah6cvpwg7by-cairo-1.16.0-dev dn4fwp0yx6nsa85cr20cwvdmg64xwmcy-python3-3.9.9 dzsj2n0nmq8nv6w0hvy5vb61kim3rzmd-pango-1.50.0 fs6rcnhbjvpxsyw5qiq0q7jx378fjrq7-python3.9-webencodings-0.5.1 g08sxarx191yh2dh0yk2j8icja54aksf-harfbuzz-3.1.2 glanz2lv7m6ak8pql0jcpr3izyp5cxm5-python3.9-pycparser-2.21 gpzx6h0dp5yhcvkfj68zs444ghll7dzm-python3.9-html5lib-1.1 gxyhqkpahahn4h8wbanzfhr1zkxbysid-expat-2.4.2-dev gy3pnc7bpff1h4ylhrivs4cjlvmxl0dk-python3.9-packaging-20.9 hhpqldw0552mf4mjdm2q7zqwy9hpfchd-libpng-apng-1.6.37-dev ig2bdwmplvs6dyg07fdyh006ha768jh1-python3.9-cffi-1.15.0 ij5rm5y6lmqzrwqd1zxckhbii3dg2nq5-glib-2.70.2-dev j5raylzz6fsafbgayyfaydadjl0x22s0-freetype-2.11.1-dev j6w2fbsl49jska4scyr860gz4df9biha-gobject-introspection-1.70.0 jfc99f1hrca6ih6h0n4ax431hjlx96j0-python3.9-brotli-1.0.9 kbazcxnki2qz514rl1plhsj3587hl8bb-python3.9-pysocks-1.7.1 kkljrrrj80fnz59qyfgnv6wvv0cbmpql-libhandy-1.5.0 l82il2lbp757c0smi81qmj4crlcmdz9s-python3.9-pygobject-3.42.0-dev m4zflhr10wz4frhgxqfi43rwvapki1pi-fontconfig-2.13.94-bin mbsc1c7mq15vgfzcdma9fglczih9ncfy-python3.9-chardet-4.0.0 mfvaaf4illpwrflg30cij5x4rncp9jin-python3.9-text-unidecode-1.3 msiv2nkdcaf4gvaf2cfnxcjm66j8mjxz-python3.9-elementpath-2.4.0 nmwapds8fcx22vd30d81va7a7a51ywwx-gettext-0.21 pbfraw351mksnkp2ni9c4rkc9cpp89iv-bash-5.1-p12 r8cbf18vrd54rb4psf3m4zlk5sd2jsv3-python3.9-pygobject-3.42.0 rig6npd9sd45ashf6fxcwgxzm7m4p0l3-python3.9-requests-2.26.0 ryj72ashr27gf4kh0ssgi3zpiv8fxw53-librsvg-2.52.4 s2jjq7rk5yrzlv9lyralzvpixg4p6jh3-atk-2.36.0 w1lsr2i37fr0mp1jya04nwa5nf5dxm2n-python3.9-setuptools-57.2.0 whfykra99ahs814l5hp3q5ps8rwzsf3s-python3.9-brotlicffi-1.0.9.2 wqdmghdvc4s95jgpp13fj5v3xar8mlks-python3.9-charset-normalizer-2.0.8 x1ha2nyji1px0iqknbyhdnvw4icw5h3i-python3.9-idna-3.3 z9560qb4ygbi0352m9pglwhi332cxb1f-python3.9-urllib3-1.26.7 +Deriver: 2ch8jx910qk6721mp4yqsmvdfgj5c8ir-banking-0.3.0.drv +Sig: cache.nixos.org-1:xcL67rBZPcdVZudDLpLeddkBa0KaFTw5A0udnaa0axysjrQ6Nvd9p3BLZ4rhKgl52/cKiU3c6aq60L8+IcE5Dw== +"#; + +lazy_static! { + static ref CASES: &'static [&'static str] = { + let data = + zstd::decode_all(io::Cursor::new(include_bytes!("../testdata/narinfo.zst"))).unwrap(); + let data = str::from_utf8(Vec::leak(data)).unwrap(); + Vec::leak( + data.split_inclusive("\n\n") + .map(|s| s.strip_suffix('\n').unwrap()) + .collect::<Vec<_>>(), + ) + }; +} + +pub fn parse(c: &mut Criterion) { + let mut g = c.benchmark_group("parse"); + + { + g.throughput(Throughput::Bytes(SAMPLE.len() as u64)); + g.bench_with_input("single", SAMPLE, |b, data| { + b.iter(|| { + black_box(NarInfo::parse(black_box(data)).ok().unwrap()); + }); + }); + } + + { + for &case in *CASES { + NarInfo::parse(case).expect("should parse"); + } + + g.throughput(Throughput::Bytes( + CASES.iter().map(|s| s.len() as u64).sum(), + )); + g.bench_with_input("many", &*CASES, |b, data| { + let mut vec = vec![]; + b.iter(|| { + vec.clear(); + vec.extend( + black_box(data) + .iter() + .map(|s| NarInfo::parse(s).ok().unwrap()), + ); + black_box(&vec); + }); + }); + } + + g.finish(); +} + +criterion_group!(benches, parse); +criterion_main!(benches); diff --git a/tvix/nix-compat/default.nix b/tvix/nix-compat/default.nix new file mode 100644 index 000000000000..d6169f1339cf --- /dev/null +++ b/tvix/nix-compat/default.nix @@ -0,0 +1,7 @@ +{ depot, ... }: + +depot.tvix.crates.workspaceMembers.nix-compat.build.override { + runTests = true; + # make sure we also enable async here, so run the tests behind that feature flag. + features = [ "default" "async" ]; +} diff --git a/tvix/nix-compat/src/aterm/escape.rs b/tvix/nix-compat/src/aterm/escape.rs new file mode 100644 index 000000000000..06b550bbf02d --- /dev/null +++ b/tvix/nix-compat/src/aterm/escape.rs @@ -0,0 +1,27 @@ +use bstr::ByteSlice; + +/// Escapes a byte sequence. Does not add surrounding quotes. +pub fn escape_bytes<P: AsRef<[u8]>>(s: P) -> Vec<u8> { + let mut s: Vec<u8> = s.as_ref().to_vec(); + + s = s.replace(b"\\", b"\\\\"); + s = s.replace(b"\n", b"\\n"); + s = s.replace(b"\r", b"\\r"); + s = s.replace(b"\t", b"\\t"); + s = s.replace(b"\"", b"\\\""); + + s +} + +#[cfg(test)] +mod tests { + use super::escape_bytes; + use test_case::test_case; + + #[test_case(b"", b""; "empty")] + #[test_case(b"\"", b"\\\""; "doublequote")] + #[test_case(b":", b":"; "colon")] + fn escape(input: &[u8], expected: &[u8]) { + assert_eq!(expected, escape_bytes(input)) + } +} diff --git a/tvix/nix-compat/src/aterm/mod.rs b/tvix/nix-compat/src/aterm/mod.rs new file mode 100644 index 000000000000..8806b6caf2e5 --- /dev/null +++ b/tvix/nix-compat/src/aterm/mod.rs @@ -0,0 +1,7 @@ +mod escape; +mod parser; + +pub(crate) use escape::escape_bytes; +pub(crate) use parser::parse_bstr_field; +pub(crate) use parser::parse_str_list; +pub(crate) use parser::parse_string_field; diff --git a/tvix/nix-compat/src/aterm/parser.rs b/tvix/nix-compat/src/aterm/parser.rs new file mode 100644 index 000000000000..72648d5ef590 --- /dev/null +++ b/tvix/nix-compat/src/aterm/parser.rs @@ -0,0 +1,118 @@ +//! This module implements parsing code for some basic building blocks +//! of the [ATerm][] format, which is used by C++ Nix to serialize Derivations. +//! +//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html +use bstr::BString; +use nom::branch::alt; +use nom::bytes::complete::{escaped_transform, is_not, tag}; +use nom::character::complete::char as nomchar; +use nom::combinator::{map, value}; +use nom::multi::separated_list0; +use nom::sequence::delimited; +use nom::IResult; + +/// Parse a bstr and undo any escaping. +fn parse_escaped_bstr(i: &[u8]) -> IResult<&[u8], BString> { + escaped_transform( + is_not("\"\\"), + '\\', + alt(( + value("\\".as_bytes(), nomchar('\\')), + value("\n".as_bytes(), nomchar('n')), + value("\t".as_bytes(), nomchar('t')), + value("\r".as_bytes(), nomchar('r')), + value("\"".as_bytes(), nomchar('\"')), + )), + )(i) + .map(|(i, v)| (i, BString::new(v))) +} + +/// Parse a field in double quotes, undo any escaping, and return the unquoted +/// and decoded `Vec<u8>`. +pub(crate) fn parse_bstr_field(i: &[u8]) -> IResult<&[u8], BString> { + // inside double quotes… + delimited( + nomchar('\"'), + // There is + alt(( + // …either is a bstr after unescaping + parse_escaped_bstr, + // …or an empty string. + map(tag(b""), |_| BString::default()), + )), + nomchar('\"'), + )(i) +} + +/// Parse a field in double quotes, undo any escaping, and return the unquoted +/// and decoded string, if it's a valid string. Or fail parsing if the bytes are +/// no valid UTF-8. +pub(crate) fn parse_string_field(i: &[u8]) -> IResult<&[u8], String> { + // inside double quotes… + delimited( + nomchar('\"'), + // There is + alt(( + // either is a String after unescaping + nom::combinator::map_opt(parse_escaped_bstr, |escaped_bstr| { + String::from_utf8(escaped_bstr.into()).ok() + }), + // or an empty string. + map(tag(b""), |_| String::new()), + )), + nomchar('\"'), + )(i) +} + +/// Parse a list of of string fields (enclosed in brackets) +pub(crate) fn parse_str_list(i: &[u8]) -> IResult<&[u8], Vec<String>> { + // inside brackets + delimited( + nomchar('['), + separated_list0(nomchar(','), parse_string_field), + nomchar(']'), + )(i) +} + +#[cfg(test)] +mod tests { + use test_case::test_case; + + #[test_case(br#""""#, b"", b""; "empty")] + #[test_case(br#""Hello World""#, b"Hello World", b""; "hello world")] + #[test_case(br#""\"""#, br#"""#, b""; "doublequote")] + #[test_case(br#"":""#, b":", b""; "colon")] + #[test_case(br#""\""Rest"#, br#"""#, b"Rest"; "doublequote rest")] + fn parse_bstr_field(input: &[u8], expected: &[u8], exp_rest: &[u8]) { + let (rest, parsed) = super::parse_bstr_field(input).expect("must parse"); + assert_eq!(exp_rest, rest, "expected remainder"); + assert_eq!(expected, parsed); + } + + #[test_case(br#""""#, "", b""; "empty")] + #[test_case(br#""Hello World""#, "Hello World", b""; "hello world")] + #[test_case(br#""\"""#, r#"""#, b""; "doublequote")] + #[test_case(br#"":""#, ":", b""; "colon")] + #[test_case(br#""\""Rest"#, r#"""#, b"Rest"; "doublequote rest")] + fn parse_string_field(input: &[u8], expected: &str, exp_rest: &[u8]) { + let (rest, parsed) = super::parse_string_field(input).expect("must parse"); + assert_eq!(exp_rest, rest, "expected remainder"); + assert_eq!(expected, &parsed); + } + + #[test] + fn parse_string_field_invalid_encoding_fail() { + let input: Vec<_> = vec![b'"', 0xc5, 0xc4, 0xd6, b'"']; + + super::parse_string_field(&input).expect_err("must fail"); + } + + #[test_case(br#"["foo"]"#, vec!["foo".to_string()], b""; "single foo")] + #[test_case(b"[]", vec![], b""; "empty list")] + #[test_case(b"[]blub", vec![], b"blub"; "empty list with rest")] + fn parse_list(input: &[u8], expected: Vec<String>, exp_rest: &[u8]) { + let (rest, parsed) = super::parse_str_list(input).expect("must parse"); + assert_eq!(exp_rest, rest, "expected remainder"); + assert_eq!(expected, parsed); + } +} diff --git a/tvix/nix-compat/src/bin/drvfmt.rs b/tvix/nix-compat/src/bin/drvfmt.rs new file mode 100644 index 000000000000..ddc1f0389f26 --- /dev/null +++ b/tvix/nix-compat/src/bin/drvfmt.rs @@ -0,0 +1,42 @@ +use std::{collections::BTreeMap, io::Read}; + +use nix_compat::derivation::Derivation; +use serde_json::json; + +/// construct a serde_json::Value from a Derivation. +/// Some environment values can be non-valid UTF-8 strings. +/// `serde_json` prints them out really unreadable. +/// This is a tool to print A-Terms in a more readable fashion, so we brutally +/// use the [std::string::ToString] implementation of [bstr::BString] to get +/// a UTF-8 string (replacing invalid characters with the Unicode replacement +/// codepoint). +fn build_serde_json_value(drv: Derivation) -> serde_json::Value { + json!({ + "args": drv.arguments, + "builder": drv.builder, + "env": drv.environment.into_iter().map(|(k,v)| (k, v.to_string())).collect::<BTreeMap<String, String>>(), + "inputDrvs": drv.input_derivations, + "inputSrcs": drv.input_sources, + "outputs": drv.outputs, + "system": drv.system, + }) +} + +fn main() { + // read A-Term from stdin + let mut buf = Vec::new(); + std::io::stdin() + .read_to_end(&mut buf) + .expect("failed to read from stdin"); + + match Derivation::from_aterm_bytes(&buf) { + Ok(drv) => { + println!( + "{}", + serde_json::to_string_pretty(&build_serde_json_value(drv)) + .expect("unable to serialize") + ); + } + Err(e) => eprintln!("unable to parse derivation: {:#?}", e), + } +} diff --git a/tvix/nix-compat/src/derivation/errors.rs b/tvix/nix-compat/src/derivation/errors.rs new file mode 100644 index 000000000000..7dcc3a5d0042 --- /dev/null +++ b/tvix/nix-compat/src/derivation/errors.rs @@ -0,0 +1,58 @@ +//! Contains [DerivationError], exported as [crate::derivation::DerivationError] +use crate::store_path; +use thiserror::Error; + +use super::CAHash; + +/// Errors that can occur during the validation of Derivation structs. +#[derive(Debug, Error, PartialEq)] +pub enum DerivationError { + // outputs + #[error("no outputs defined")] + NoOutputs(), + #[error("invalid output name: {0}")] + InvalidOutputName(String), + #[error("encountered fixed-output derivation, but more than 1 output in total")] + MoreThanOneOutputButFixed(), + #[error("invalid output name for fixed-output derivation: {0}")] + InvalidOutputNameForFixed(String), + #[error("unable to validate output {0}: {1}")] + InvalidOutput(String, OutputError), + #[error("unable to validate output {0}: {1}")] + InvalidOutputDerivationPath(String, store_path::BuildStorePathError), + // input derivation + #[error("unable to parse input derivation path {0}: {1}")] + InvalidInputDerivationPath(String, store_path::Error), + #[error("input derivation {0} doesn't end with .drv")] + InvalidInputDerivationPrefix(String), + #[error("input derivation {0} output names are empty")] + EmptyInputDerivationOutputNames(String), + #[error("input derivation {0} output name {1} is invalid")] + InvalidInputDerivationOutputName(String, String), + + // input sources + #[error("unable to parse input sources path {0}: {1}")] + InvalidInputSourcesPath(String, store_path::Error), + + // platform + #[error("invalid platform field: {0}")] + InvalidPlatform(String), + + // builder + #[error("invalid builder field: {0}")] + InvalidBuilder(String), + + // environment + #[error("invalid environment key {0}")] + InvalidEnvironmentKey(String), +} + +/// Errors that can occur during the validation of a specific +// [crate::derivation::Output] of a [crate::derivation::Derviation]. +#[derive(Debug, Error, PartialEq)] +pub enum OutputError { + #[error("Invalid output path {0}: {1}")] + InvalidOutputPath(String, store_path::Error), + #[error("Invalid CAHash: {:?}", .0)] + InvalidCAHash(CAHash), +} diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs new file mode 100644 index 000000000000..2f90c54d8f76 --- /dev/null +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -0,0 +1,289 @@ +use crate::store_path::{ + self, build_ca_path, build_output_path, build_text_path, StorePath, StorePathRef, +}; +use bstr::BString; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use std::collections::{BTreeMap, BTreeSet}; +use std::io; + +mod errors; +mod output; +mod parse_error; +mod parser; +mod validate; +mod write; + +#[cfg(test)] +mod tests; + +// Public API of the crate. +pub use crate::nixhash::{CAHash, NixHash}; +pub use errors::{DerivationError, OutputError}; +pub use output::Output; + +#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] +pub struct Derivation { + #[serde(rename = "args")] + pub arguments: Vec<String>, + + pub builder: String, + + #[serde(rename = "env")] + pub environment: BTreeMap<String, BString>, + + #[serde(rename = "inputDrvs")] + pub input_derivations: BTreeMap<String, BTreeSet<String>>, + + #[serde(rename = "inputSrcs")] + pub input_sources: BTreeSet<String>, + + pub outputs: BTreeMap<String, Output>, + + pub system: String, +} + +impl Derivation { + /// write the Derivation to the given [std::io::Write], in ATerm format. + /// + /// The only errors returns are these when writing to the passed writer. + pub fn serialize(&self, writer: &mut impl std::io::Write) -> Result<(), io::Error> { + use write::*; + + writer.write_all(write::DERIVATION_PREFIX.as_bytes())?; + write_char(writer, write::PAREN_OPEN)?; + + write_outputs(writer, &self.outputs)?; + write_char(writer, COMMA)?; + + write_input_derivations(writer, &self.input_derivations)?; + write_char(writer, COMMA)?; + + write_input_sources(writer, &self.input_sources)?; + write_char(writer, COMMA)?; + + write_system(writer, &self.system)?; + write_char(writer, COMMA)?; + + write_builder(writer, &self.builder)?; + write_char(writer, COMMA)?; + + write_arguments(writer, &self.arguments)?; + write_char(writer, COMMA)?; + + write_environment(writer, &self.environment)?; + + write_char(writer, PAREN_CLOSE)?; + + Ok(()) + } + + /// return the ATerm serialization. + pub fn to_aterm_bytes(&self) -> Vec<u8> { + let mut buffer: Vec<u8> = Vec::new(); + + // invoke serialize and write to the buffer. + // Note we only propagate errors writing to the writer in serialize, + // which won't panic for the string we write to. + self.serialize(&mut buffer).unwrap(); + + buffer + } + + /// Parse an Derivation in ATerm serialization, and validate it passes our + /// set of validations. + pub fn from_aterm_bytes(b: &[u8]) -> Result<Derivation, parser::Error<&[u8]>> { + parser::parse(b) + } + + /// Returns the drv path of a [Derivation] struct. + /// + /// The drv path is calculated by invoking [build_text_path], using + /// the `name` with a `.drv` suffix as name, all [Derivation::input_sources] and + /// keys of [Derivation::input_derivations] as references, and the ATerm string of + /// the [Derivation] as content. + pub fn calculate_derivation_path(&self, name: &str) -> Result<StorePath, DerivationError> { + // append .drv to the name + let name = &format!("{}.drv", name); + + // collect the list of paths from input_sources and input_derivations + // into a (sorted, guaranteed by BTreeSet) list of references + let references: BTreeSet<String> = { + let mut inputs = self.input_sources.clone(); + let input_derivation_keys: Vec<String> = + self.input_derivations.keys().cloned().collect(); + inputs.extend(input_derivation_keys); + inputs + }; + + build_text_path(name, self.to_aterm_bytes(), references) + .map(|s| s.to_owned()) + .map_err(|_e| DerivationError::InvalidOutputName(name.to_string())) + } + + /// Returns the FOD digest, if the derivation is fixed-output, or None if + /// it's not. + /// TODO: this is kinda the string from [build_ca_path] with a + /// [CAHash::Flat], what's fed to `build_store_path_from_fingerprint_parts` + /// (except the out_output.path being an empty string) + fn fod_digest(&self) -> Option<[u8; 32]> { + if self.outputs.len() != 1 { + return None; + } + + let out_output = self.outputs.get("out")?; + let ca_hash = &out_output.ca_hash.as_ref()?; + + Some( + Sha256::new_with_prefix(format!( + "fixed:out:{}{}:{}", + ca_kind_prefix(ca_hash), + ca_hash.digest().to_nix_hex_string(), + out_output.path + )) + .finalize() + .into(), + ) + } + + /// Calculates the hash of a derivation modulo fixed-output subderivations. + /// + /// This is called `hashDerivationModulo` in nixcpp. + /// + /// It returns a [NixHash], created by calculating the sha256 digest of + /// the derivation ATerm representation, except that: + /// - any input derivation paths have beed replaced "by the result of a + /// recursive call to this function" and that + /// - for fixed-output derivations the special + /// `fixed:out:${algo}:${digest}:${fodPath}` string is hashed instead of + /// the A-Term. + /// + /// If the derivation is not a fixed derivation, it's up to the caller of + /// this function to provide a lookup function to lookup these calculation + /// results of parent derivations at `fn_get_hash_derivation_modulo` (by + /// drv path). + pub fn derivation_or_fod_hash<F>(&self, fn_get_derivation_or_fod_hash: F) -> NixHash + where + F: Fn(&StorePathRef) -> NixHash, + { + // Fixed-output derivations return a fixed hash. + // Non-Fixed-output derivations return a hash of the ATerm notation, but with all + // input_derivation paths replaced by a recursive call to this function. + // We use fn_get_derivation_or_fod_hash here, so callers can precompute this. + let digest = self.fod_digest().unwrap_or({ + // This is a new map from derivation_or_fod_hash.digest (as lowerhex) + // to list of output names + let mut replaced_input_derivations: BTreeMap<String, BTreeSet<String>> = + BTreeMap::new(); + + // For each input_derivation, look up the + // derivation_or_fod_hash, and replace the derivation path with it's HEXLOWER + // digest. + // This is not the [NixHash::to_nix_hash_string], but without the sha256: prefix). + for (drv_path_str, output_names) in &self.input_derivations { + // parse drv_path to StorePathRef + let drv_path = StorePathRef::from_absolute_path(drv_path_str.as_bytes()) + .expect("invalid input derivation path"); + replaced_input_derivations.insert( + data_encoding::HEXLOWER + .encode(fn_get_derivation_or_fod_hash(&drv_path).digest_as_bytes()), + output_names.clone(), + ); + } + + // construct a new derivation struct with these replaced input derivation strings + let replaced_derivation = Derivation { + input_derivations: replaced_input_derivations, + ..self.clone() + }; + + // write the ATerm of that to the hash function + let mut hasher = Sha256::new(); + hasher.update(replaced_derivation.to_aterm_bytes()); + + hasher.finalize().into() + }); + + NixHash::Sha256(digest) + } + + /// This calculates all output paths of a Derivation and updates the struct. + /// It requires the struct to be initially without output paths. + /// This means, self.outputs[$outputName].path needs to be an empty string, + /// and self.environment[$outputName] needs to be an empty string. + /// + /// Output path calculation requires knowledge of the + /// derivation_or_fod_hash [NixHash], which (in case of non-fixed-output + /// derivations) also requires knowledge of other hash_derivation_modulo + /// [NixHash]es. + /// + /// We solve this by asking the caller of this function to provide the + /// hash_derivation_modulo of the current Derivation. + /// + /// On completion, self.environment[$outputName] and + /// self.outputs[$outputName].path are set to the calculated output path for all + /// outputs. + pub fn calculate_output_paths( + &mut self, + name: &str, + derivation_or_fod_hash: &NixHash, + ) -> Result<(), DerivationError> { + // The fingerprint and hash differs per output + for (output_name, output) in self.outputs.iter_mut() { + // Assert that outputs are not yet populated, to avoid using this function wrongly. + // We don't also go over self.environment, but it's a sufficient + // footgun prevention mechanism. + assert!(output.path.is_empty()); + + let path_name = output_path_name(name, output_name); + + // For fixed output derivation we use the per-output info, otherwise we use the + // derivation hash. + let abs_store_path = if let Some(ref hwm) = output.ca_hash { + build_ca_path(&path_name, hwm, Vec::<String>::new(), false).map_err(|e| { + DerivationError::InvalidOutputDerivationPath(output_name.to_string(), e) + })? + } else { + build_output_path(derivation_or_fod_hash, output_name, &path_name).map_err(|e| { + DerivationError::InvalidOutputDerivationPath( + output_name.to_string(), + store_path::BuildStorePathError::InvalidStorePath(e), + ) + })? + }; + + output.path = abs_store_path.to_absolute_path(); + self.environment.insert( + output_name.to_string(), + abs_store_path.to_absolute_path().into(), + ); + } + + Ok(()) + } +} + +/// Calculate the name part of the store path of a derivation [Output]. +/// +/// It's the name, and (if it's the non-out output), the output name +/// after a `-`. +fn output_path_name(derivation_name: &str, output_name: &str) -> String { + let mut output_path_name = derivation_name.to_string(); + if output_name != "out" { + output_path_name.push('-'); + output_path_name.push_str(output_name); + } + output_path_name +} + +/// For a [CAHash], return the "prefix" used for NAR purposes. +/// For [CAHash::Flat], this is an empty string, for [CAHash::Nar], it's "r:". +/// Panics for other [CAHash] kinds, as they're not valid in a derivation +/// context. +fn ca_kind_prefix(ca_hash: &CAHash) -> &'static str { + match ca_hash { + CAHash::Flat(_) => "", + CAHash::Nar(_) => "r:", + _ => panic!("invalid ca hash in derivation context: {:?}", ca_hash), + } +} diff --git a/tvix/nix-compat/src/derivation/output.rs b/tvix/nix-compat/src/derivation/output.rs new file mode 100644 index 000000000000..b7fa1ac379b0 --- /dev/null +++ b/tvix/nix-compat/src/derivation/output.rs @@ -0,0 +1,146 @@ +use crate::derivation::OutputError; +use crate::nixhash::CAHash; +use crate::store_path::StorePathRef; +use serde::{Deserialize, Serialize}; +use serde_json::Map; + +#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)] +pub struct Output { + pub path: String, + + #[serde(flatten)] + pub ca_hash: Option<CAHash>, // we can only represent a subset here. +} + +impl<'de> Deserialize<'de> for Output { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + let fields = Map::deserialize(deserializer)?; + Ok(Self { + path: fields + .get("path") + .ok_or(serde::de::Error::missing_field( + "`path` is missing but required for outputs", + ))? + .as_str() + .ok_or(serde::de::Error::invalid_type( + serde::de::Unexpected::Other("certainly not a string"), + &"a string", + ))? + .to_owned(), + ca_hash: CAHash::from_map::<D>(&fields)?, + }) + } +} + +impl Output { + pub fn is_fixed(&self) -> bool { + self.ca_hash.is_some() + } + + pub fn validate(&self, validate_output_paths: bool) -> Result<(), OutputError> { + if let Some(fixed_output_hash) = &self.ca_hash { + match fixed_output_hash { + CAHash::Flat(_) | CAHash::Nar(_) => { + // all hashes allowed for Flat, and Nar. + } + _ => return Err(OutputError::InvalidCAHash(fixed_output_hash.clone())), + } + } + + if validate_output_paths { + if let Err(e) = StorePathRef::from_absolute_path(self.path.as_bytes()) { + return Err(OutputError::InvalidOutputPath(self.path.to_string(), e)); + } + } + Ok(()) + } +} + +/// This ensures that a potentially valid input addressed +/// output is deserialized as a non-fixed output. +#[test] +fn deserialize_valid_input_addressed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablabla" + }"#; + let output: Output = serde_json::from_str(json_bytes).expect("must parse"); + + assert!(!output.is_fixed()); +} + +/// This ensures that a potentially valid fixed output +/// output deserializes fine as a fixed output. +#[test] +fn deserialize_valid_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha256" + }"#; + let output: Output = serde_json::from_str(json_bytes).expect("must parse"); + + assert!(output.is_fixed()); +} + +/// This ensures that parsing an input with the invalid hash encoding +/// will result in a parsing failure. +#[test] +fn deserialize_with_error_invalid_hash_encoding_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "IAMNOTVALIDNIXBASE32", + "hashAlgo": "r:sha256" + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the wrong hash algo +/// will result in a parsing failure. +#[test] +fn deserialize_with_error_invalid_hash_algo_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha1024" + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the missing hash algo but present hash will result in a +/// parsing failure. +#[test] +fn deserialize_with_error_missing_hash_algo_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the missing hash but present hash algo will result in a +/// parsing failure. +#[test] +fn deserialize_with_error_missing_hash_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hashAlgo": "r:sha1024" + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} diff --git a/tvix/nix-compat/src/derivation/parse_error.rs b/tvix/nix-compat/src/derivation/parse_error.rs new file mode 100644 index 000000000000..26df13f5772b --- /dev/null +++ b/tvix/nix-compat/src/derivation/parse_error.rs @@ -0,0 +1,81 @@ +//! This contains error and result types that can happen while parsing +//! Derivations from ATerm. +use nom::IResult; + +use crate::nixhash; + +pub type NomResult<I, O> = IResult<I, O, NomError<I>>; + +#[derive(Debug, thiserror::Error, PartialEq)] +pub enum ErrorKind { + /// duplicate key in map + #[error("duplicate map key: {0}")] + DuplicateMapKey(String), + + /// Input derivation has two outputs with the same name + #[error("duplicate output name {1} for input derivation {0}")] + DuplicateInputDerivationOutputName(String, String), + + #[error("duplicate input source: {0}")] + DuplicateInputSource(String), + + #[error("nix hash error: {0}")] + NixHashError(nixhash::Error), + + #[error("nom error: {0:?}")] + Nom(nom::error::ErrorKind), +} + +/// Our own error type to pass along parser-related errors. +#[derive(Debug, PartialEq)] +pub struct NomError<I> { + /// position of the error in the input data + pub input: I, + /// error code + pub code: ErrorKind, +} + +impl<I, E> nom::error::FromExternalError<I, E> for NomError<I> { + fn from_external_error(input: I, kind: nom::error::ErrorKind, _e: E) -> Self { + Self { + input, + code: ErrorKind::Nom(kind), + } + } +} + +impl<I> nom::error::ParseError<I> for NomError<I> { + fn from_error_kind(input: I, kind: nom::error::ErrorKind) -> Self { + Self { + input, + code: ErrorKind::Nom(kind), + } + } + + // FUTUREWORK: implement, so we have support for backtracking through the + // parse tree? + fn append(_input: I, _kind: nom::error::ErrorKind, other: Self) -> Self { + other + } +} + +/// This wraps a [nom::error::Error] into our error. +impl<I> From<nom::error::Error<I>> for NomError<I> { + fn from(value: nom::error::Error<I>) -> Self { + Self { + input: value.input, + code: ErrorKind::Nom(value.code), + } + } +} + +/// This essentially implements +/// `From<nom::Err<nom::error::Error<I>>>` for `nom::Err<NomError<I>>`, +/// which we can't because `nom::Err<_>` is a foreign type. +pub(crate) fn into_nomerror<I>(e: nom::Err<nom::error::Error<I>>) -> nom::Err<NomError<I>> { + match e { + nom::Err::Incomplete(n) => nom::Err::Incomplete(n), + nom::Err::Error(e) => nom::Err::Error(e.into()), + nom::Err::Failure(e) => nom::Err::Failure(e.into()), + } +} diff --git a/tvix/nix-compat/src/derivation/parser.rs b/tvix/nix-compat/src/derivation/parser.rs new file mode 100644 index 000000000000..7ffa6fd46eb6 --- /dev/null +++ b/tvix/nix-compat/src/derivation/parser.rs @@ -0,0 +1,510 @@ +//! This module constructs a [Derivation] by parsing its [ATerm][] +//! serialization. +//! +//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html + +use bstr::BString; +use nom::bytes::complete::tag; +use nom::character::complete::char as nomchar; +use nom::combinator::{all_consuming, map_res}; +use nom::multi::{separated_list0, separated_list1}; +use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple}; +use std::collections::{btree_map, BTreeMap, BTreeSet}; +use thiserror; + +use crate::derivation::parse_error::{into_nomerror, ErrorKind, NomError, NomResult}; +use crate::derivation::{write, CAHash, Derivation, Output}; +use crate::{aterm, nixhash}; + +#[derive(Debug, thiserror::Error)] +pub enum Error<I> { + #[error("parsing error: {0}")] + Parser(NomError<I>), + #[error("premature EOF")] + Incomplete, + #[error("validation error: {0}")] + Validation(super::DerivationError), +} + +pub(crate) fn parse(i: &[u8]) -> Result<Derivation, Error<&[u8]>> { + match all_consuming(parse_derivation)(i) { + Ok((rest, derivation)) => { + // this shouldn't happen, as all_consuming shouldn't return. + debug_assert!(rest.is_empty()); + + // invoke validate + derivation.validate(true).map_err(Error::Validation)?; + + Ok(derivation) + } + Err(nom::Err::Incomplete(_)) => Err(Error::Incomplete), + Err(nom::Err::Error(e) | nom::Err::Failure(e)) => Err(Error::Parser(e)), + } +} + +/// Consume a string containing the algo, and optionally a `r:` +/// prefix, and a digest (bytes), return a [CAHash::Nar] or [CAHash::Flat]. +fn from_algo_and_mode_and_digest<B: AsRef<[u8]>>( + algo_and_mode: &str, + digest: B, +) -> crate::nixhash::Result<CAHash> { + Ok(match algo_and_mode.strip_prefix("r:") { + Some(algo) => nixhash::CAHash::Nar(nixhash::from_algo_and_digest( + algo.try_into()?, + digest.as_ref(), + )?), + None => nixhash::CAHash::Flat(nixhash::from_algo_and_digest( + algo_and_mode.try_into()?, + digest.as_ref(), + )?), + }) +} + +/// Parse one output in ATerm. This is 4 string fields inside parans: +/// output name, output path, algo (and mode), digest. +/// Returns the output name and [Output] struct. +fn parse_output(i: &[u8]) -> NomResult<&[u8], (String, Output)> { + delimited( + nomchar('('), + map_res( + |i| { + tuple(( + terminated(aterm::parse_string_field, nomchar(',')), + terminated(aterm::parse_string_field, nomchar(',')), + terminated(aterm::parse_string_field, nomchar(',')), + aterm::parse_bstr_field, + ))(i) + .map_err(into_nomerror) + }, + |(output_name, output_path, algo_and_mode, encoded_digest)| { + // convert these 4 fields into an [Output]. + let ca_hash_res = { + if algo_and_mode.is_empty() && encoded_digest.is_empty() { + None + } else { + match data_encoding::HEXLOWER.decode(&encoded_digest) { + Ok(digest) => { + Some(from_algo_and_mode_and_digest(&algo_and_mode, digest)) + } + Err(e) => Some(Err(nixhash::Error::InvalidBase64Encoding(e))), + } + } + } + .transpose(); + + match ca_hash_res { + Ok(hash_with_mode) => Ok(( + output_name, + Output { + path: output_path, + ca_hash: hash_with_mode, + }, + )), + Err(e) => Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::NixHashError(e), + })), + } + }, + ), + nomchar(')'), + )(i) +} + +/// Parse multiple outputs in ATerm. This is a list of things acccepted by +/// parse_output, and takes care of turning the (String, Output) returned from +/// it to a BTreeMap. +/// We don't use parse_kv here, as it's dealing with 2-tuples, and these are +/// 4-tuples. +fn parse_outputs(i: &[u8]) -> NomResult<&[u8], BTreeMap<String, Output>> { + let res = delimited( + nomchar('['), + separated_list1(tag(","), parse_output), + nomchar(']'), + )(i); + + match res { + Ok((rst, outputs_lst)) => { + let mut outputs: BTreeMap<String, Output> = BTreeMap::default(); + for (output_name, output) in outputs_lst.into_iter() { + if outputs.contains_key(&output_name) { + return Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::DuplicateMapKey(output_name), + })); + } + outputs.insert(output_name, output); + } + Ok((rst, outputs)) + } + // pass regular parse errors along + Err(e) => Err(e), + } +} + +fn parse_input_derivations(i: &[u8]) -> NomResult<&[u8], BTreeMap<String, BTreeSet<String>>> { + let (i, input_derivations_list) = parse_kv::<Vec<String>, _>(aterm::parse_str_list)(i)?; + + // This is a HashMap of drv paths to a list of output names. + let mut input_derivations: BTreeMap<String, BTreeSet<String>> = BTreeMap::new(); + + for (input_derivation, output_names) in input_derivations_list { + let mut new_output_names = BTreeSet::new(); + for output_name in output_names.into_iter() { + if new_output_names.contains(&output_name) { + return Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::DuplicateInputDerivationOutputName( + input_derivation.to_string(), + output_name.to_string(), + ), + })); + } else { + new_output_names.insert(output_name); + } + } + input_derivations.insert(input_derivation, new_output_names); + } + + Ok((i, input_derivations)) +} + +fn parse_input_sources(i: &[u8]) -> NomResult<&[u8], BTreeSet<String>> { + let (i, input_sources_lst) = aterm::parse_str_list(i).map_err(into_nomerror)?; + + let mut input_sources: BTreeSet<_> = BTreeSet::new(); + for input_source in input_sources_lst.into_iter() { + if input_sources.contains(&input_source) { + return Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::DuplicateInputSource(input_source), + })); + } else { + input_sources.insert(input_source); + } + } + + Ok((i, input_sources)) +} + +pub fn parse_derivation(i: &[u8]) -> NomResult<&[u8], Derivation> { + use nom::Parser; + preceded( + tag(write::DERIVATION_PREFIX), + delimited( + // inside parens + nomchar('('), + // tuple requires all errors to be of the same type, so we need to be a + // bit verbose here wrapping generic IResult into [NomATermResult]. + tuple(( + // parse outputs + terminated(parse_outputs, nomchar(',')), + // // parse input derivations + terminated(parse_input_derivations, nomchar(',')), + // // parse input sources + terminated(parse_input_sources, nomchar(',')), + // // parse system + |i| terminated(aterm::parse_string_field, nomchar(','))(i).map_err(into_nomerror), + // // parse builder + |i| terminated(aterm::parse_string_field, nomchar(','))(i).map_err(into_nomerror), + // // parse arguments + |i| terminated(aterm::parse_str_list, nomchar(','))(i).map_err(into_nomerror), + // parse environment + parse_kv::<BString, _>(aterm::parse_bstr_field), + )), + nomchar(')'), + ) + .map( + |( + outputs, + input_derivations, + input_sources, + system, + builder, + arguments, + environment, + )| { + Derivation { + arguments, + builder, + environment, + input_derivations, + input_sources, + outputs, + system, + } + }, + ), + )(i) +} + +/// Parse a list of key/value pairs into a BTreeMap. +/// The parser for the values can be passed in. +/// In terms of ATerm, this is just a 2-tuple, +/// but we have the additional restriction that the first element needs to be +/// unique across all tuples. +pub(crate) fn parse_kv<'a, V, VF>( + vf: VF, +) -> impl FnMut(&'a [u8]) -> NomResult<&'a [u8], BTreeMap<String, V>> + 'static +where + VF: FnMut(&'a [u8]) -> nom::IResult<&'a [u8], V, nom::error::Error<&'a [u8]>> + Clone + 'static, +{ + move |i| + // inside brackets + delimited( + nomchar('['), + |ii| { + let res = separated_list0( + nomchar(','), + // inside parens + delimited( + nomchar('('), + separated_pair( + aterm::parse_string_field, + nomchar(','), + vf.clone(), + ), + nomchar(')'), + ), + )(ii).map_err(into_nomerror); + + match res { + Ok((rest, pairs)) => { + let mut kvs: BTreeMap<String, V> = BTreeMap::new(); + for (k, v) in pairs.into_iter() { + // collect the 2-tuple to a BTreeMap, + // and fail if the key was already seen before. + match kvs.entry(k) { + btree_map::Entry::Vacant(e) => { e.insert(v); }, + btree_map::Entry::Occupied(e) => { + return Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::DuplicateMapKey(e.key().clone()), + })); + } + } + } + Ok((rest, kvs)) + } + Err(e) => Err(e), + } + }, + nomchar(']'), + )(i) +} + +#[cfg(test)] +mod tests { + use std::collections::{BTreeMap, BTreeSet}; + + use crate::derivation::{ + parse_error::ErrorKind, parser::from_algo_and_mode_and_digest, CAHash, NixHash, Output, + }; + use bstr::{BString, ByteSlice}; + use hex_literal::hex; + use lazy_static::lazy_static; + use test_case::test_case; + const DIGEST_SHA256: [u8; 32] = + hex!("a5ce9c155ed09397614646c9717fc7cd94b1023d7b76b618d409e4fefd6e9d39"); + + lazy_static! { + pub static ref NIXHASH_SHA256: NixHash = NixHash::Sha256(DIGEST_SHA256); + static ref EXP_MULTI_OUTPUTS: BTreeMap<String, Output> = { + let mut b = BTreeMap::new(); + b.insert( + "lib".to_string(), + Output { + path: "/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib" + .to_string(), + ca_hash: None, + }, + ); + b.insert( + "out".to_string(), + Output { + path: "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out".to_string(), + ca_hash: None, + }, + ); + b + }; + static ref EXP_AB_MAP: BTreeMap<String, BString> = { + let mut b = BTreeMap::new(); + b.insert("a".to_string(), b"1".as_bstr().to_owned()); + b.insert("b".to_string(), b"2".as_bstr().to_owned()); + b + }; + static ref EXP_INPUT_DERIVATIONS_SIMPLE: BTreeMap<String, BTreeSet<String>> = { + let mut b = BTreeMap::new(); + b.insert( + "/nix/store/8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv".to_string(), + { + let mut output_names = BTreeSet::new(); + output_names.insert("out".to_string()); + output_names + }, + ); + b.insert( + "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv".to_string(), + { + let mut output_names = BTreeSet::new(); + output_names.insert("out".to_string()); + output_names.insert("lib".to_string()); + output_names + }, + ); + b + }; + static ref EXP_INPUT_DERIVATIONS_SIMPLE_ATERM: String = { + format!( + "[(\"{0}\",[\"out\"]),(\"{1}\",[\"out\",\"lib\"])]", + "/nix/store/8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv", + "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv" + ) + }; + static ref EXP_INPUT_SOURCES_SIMPLE: BTreeSet<String> = { + let mut b = BTreeSet::new(); + b.insert("/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out".to_string()); + b.insert("/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib".to_string()); + b + }; + } + + /// Ensure parsing KVs works + #[test_case(b"[]", &BTreeMap::new(), b""; "empty")] + #[test_case(b"[(\"a\",\"1\"),(\"b\",\"2\")]", &EXP_AB_MAP, b""; "simple")] + fn parse_kv(input: &'static [u8], expected: &BTreeMap<String, BString>, exp_rest: &[u8]) { + let (rest, parsed) = super::parse_kv::<BString, _>(crate::aterm::parse_bstr_field)(input) + .expect("must parse"); + assert_eq!(exp_rest, rest, "expected remainder"); + assert_eq!(*expected, parsed); + } + + /// Ensures the kv parser complains about duplicate map keys + #[test] + fn parse_kv_fail_dup_keys() { + let input: &'static [u8] = b"[(\"a\",\"1\"),(\"a\",\"2\")]"; + let e = super::parse_kv::<BString, _>(crate::aterm::parse_bstr_field)(input) + .expect_err("must fail"); + + match e { + nom::Err::Failure(e) => { + assert_eq!(ErrorKind::DuplicateMapKey("a".to_string()), e.code); + } + _ => panic!("unexpected error"), + } + } + + /// Ensure parsing input derivations works. + #[test_case(b"[]", &BTreeMap::new(); "empty")] + #[test_case(EXP_INPUT_DERIVATIONS_SIMPLE_ATERM.as_bytes(), &EXP_INPUT_DERIVATIONS_SIMPLE; "simple")] + fn parse_input_derivations( + input: &'static [u8], + expected: &BTreeMap<String, BTreeSet<String>>, + ) { + let (rest, parsed) = super::parse_input_derivations(input).expect("must parse"); + + assert_eq!(expected, &parsed, "parsed mismatch"); + assert!(rest.is_empty(), "rest must be empty"); + } + + /// Ensures the input derivation parser complains about duplicate output names + #[test] + fn parse_input_derivations_fail_dup_output_names() { + let input_str = format!( + "[(\"{0}\",[\"out\"]),(\"{1}\",[\"out\",\"out\"])]", + "/nix/store/8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv", + "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv" + ); + let e = super::parse_input_derivations(input_str.as_bytes()).expect_err("must fail"); + + match e { + nom::Err::Failure(e) => { + assert_eq!( + ErrorKind::DuplicateInputDerivationOutputName( + "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv".to_string(), + "out".to_string() + ), + e.code + ); + } + _ => panic!("unexpected error"), + } + } + + /// Ensure parsing input sources works + #[test_case(b"[]", &BTreeSet::new(); "empty")] + #[test_case(b"[\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out\",\"/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib\"]", &EXP_INPUT_SOURCES_SIMPLE; "simple")] + fn parse_input_sources(input: &'static [u8], expected: &BTreeSet<String>) { + let (rest, parsed) = super::parse_input_sources(input).expect("must parse"); + + assert_eq!(expected, &parsed, "parsed mismatch"); + assert!(rest.is_empty(), "rest must be empty"); + } + + /// Ensures the input sources parser complains about duplicate input sources + #[test] + fn parse_input_sources_fail_dup_keys() { + let input: &'static [u8] = b"[\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo\",\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo\"]"; + let e = super::parse_input_sources(input).expect_err("must fail"); + + match e { + nom::Err::Failure(e) => { + assert_eq!( + ErrorKind::DuplicateInputSource( + "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo".to_string() + ), + e.code + ); + } + _ => panic!("unexpected error"), + } + } + + #[test_case( + br#"("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")"#, + ("out".to_string(), Output { + path: "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo".to_string(), + ca_hash: None + }); "simple" + )] + #[test_case( + br#"("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar","r:sha256","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba")"#, + ("out".to_string(), Output { + path: "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar".to_string(), + ca_hash: Some(from_algo_and_mode_and_digest("r:sha256", + data_encoding::HEXLOWER.decode(b"08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba").unwrap() ).unwrap()), + }); "fod" + )] + fn parse_output(input: &[u8], expected: (String, Output)) { + let (rest, parsed) = super::parse_output(input).expect("must parse"); + assert!(rest.is_empty()); + assert_eq!(expected, parsed); + } + + #[test_case( + br#"[("lib","/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib","",""),("out","/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out","","")]"#, + &EXP_MULTI_OUTPUTS; + "multi-out" + )] + fn parse_outputs(input: &[u8], expected: &BTreeMap<String, Output>) { + let (rest, parsed) = super::parse_outputs(input).expect("must parse"); + assert!(rest.is_empty()); + assert_eq!(*expected, parsed); + } + + #[test_case("sha256", &DIGEST_SHA256, CAHash::Flat(NIXHASH_SHA256.clone()); "sha256 flat")] + #[test_case("r:sha256", &DIGEST_SHA256, CAHash::Nar(NIXHASH_SHA256.clone()); "sha256 recursive")] + fn test_from_algo_and_mode_and_digest(algo_and_mode: &str, digest: &[u8], expected: CAHash) { + assert_eq!( + expected, + from_algo_and_mode_and_digest(algo_and_mode, digest).unwrap() + ); + } + + #[test] + fn from_algo_and_mode_and_digest_failure() { + assert!(from_algo_and_mode_and_digest("r:sha256", []).is_err()); + assert!(from_algo_and_mode_and_digest("ha256", DIGEST_SHA256).is_err()); + } +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/duplicate.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/duplicate.drv new file mode 100644 index 000000000000..072561a29e3a --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/duplicate.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")],[("/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv",["out"])],[],":",":",[],[("bar","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar"),("builder",":"),("name","foo"),("name","bar"),("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv new file mode 100644 index 000000000000..a4fea3c5f486 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar","r:sha256","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba")],[],[],":",":",[],[("builder",":"),("name","bar"),("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar"),("outputHash","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba"),("outputHashAlgo","sha256"),("outputHashMode","recursive"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv.json new file mode 100644 index 000000000000..c8bbc4cbb5be --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "name": "bar", + "out": "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar", + "outputHash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "outputHashAlgo": "sha256", + "outputHashMode": "recursive", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha256", + "path": "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv new file mode 100644 index 000000000000..f0d9230a5a52 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json","","")],[],[],":",":",[],[("builder",":"),("json","{\"hello\":\"moto\\n\"}"),("name","nested-json"),("out","/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv.json new file mode 100644 index 000000000000..9cb0b43b4c09 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv.json @@ -0,0 +1,19 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "json": "{\"hello\":\"moto\\n\"}", + "name": "nested-json", + "out": "/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv new file mode 100644 index 000000000000..a2cf9d31f92e --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")],[("/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv",["out"])],[],":",":",[],[("bar","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar"),("builder",":"),("name","foo"),("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv.json new file mode 100644 index 000000000000..957a85ccab82 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "bar": "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar", + "builder": ":", + "name": "foo", + "out": "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo", + "system": ":" + }, + "inputDrvs": { + "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv": [ + "out" + ] + }, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv new file mode 100644 index 000000000000..bbe88c02c739 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode","","")],[],[],":",":",[],[("builder",":"),("letters","räksmörgås\nrødgrød med fløde\nLübeck\n肥猪\nこんにちは / 今日は\n🌮\n"),("name","unicode"),("out","/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv.json new file mode 100644 index 000000000000..f8f33c1bba17 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv.json @@ -0,0 +1,19 @@ +{ + "outputs": { + "out": { + "path": "/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode" + } + }, + "inputSrcs": [], + "inputDrvs": {}, + "system": ":", + "builder": ":", + "args": [], + "env": { + "builder": ":", + "letters": "räksmörgås\nrødgrød med fløde\nLübeck\n肥猪\nこんにちは / 今日は\n🌮\n", + "name": "unicode", + "out": "/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode", + "system": ":" + } +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv new file mode 100644 index 000000000000..4b9338c0b953 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs","","")],[],[],":",":",[],[("__json","{\"builder\":\":\",\"name\":\"structured-attrs\",\"system\":\":\"}"),("out","/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv.json new file mode 100644 index 000000000000..74e3d7df55c5 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv.json @@ -0,0 +1,16 @@ +{ + "args": [], + "builder": ":", + "env": { + "__json": "{\"builder\":\":\",\"name\":\"structured-attrs\",\"system\":\":\"}", + "out": "/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv new file mode 100644 index 000000000000..1699c2a75e48 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo","","")],[("/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv",["out"])],[],":",":",[],[("bar","/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar"),("builder",":"),("name","foo"),("out","/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv.json new file mode 100644 index 000000000000..831d27956d86 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "bar": "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar", + "builder": ":", + "name": "foo", + "out": "/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo", + "system": ":" + }, + "inputDrvs": { + "/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv": [ + "out" + ] + }, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv new file mode 100644 index 000000000000..523612238c76 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv @@ -0,0 +1 @@ +Derive([("lib","/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib","",""),("out","/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out","","")],[],[],":",":",[],[("builder",":"),("lib","/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib"),("name","has-multi-out"),("out","/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out"),("outputs","out lib"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv.json new file mode 100644 index 000000000000..0bd7a2991cc7 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "lib": "/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib", + "name": "has-multi-out", + "out": "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out", + "outputs": "out lib", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "lib": { + "path": "/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib" + }, + "out": { + "path": "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv new file mode 100644 index 000000000000..6a7a35c58c3f --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252","","")],[],[],":",":",[],[("builder",":"),("chars",""),("name","cp1252"),("out","/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json new file mode 100644 index 000000000000..9d6ba8b7977f --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json @@ -0,0 +1,21 @@ +{ + "/nix/store/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv": { + "outputs": { + "out": { + "path": "/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252" + } + }, + "inputSrcs": [], + "inputDrvs": {}, + "system": ":", + "builder": ":", + "args": [], + "env": { + "builder": ":", + "chars": "", + "name": "cp1252", + "out": "/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252", + "system": ":" + } + } +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv new file mode 100644 index 000000000000..559e93ed0ed6 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar","r:sha1","0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33")],[],[],":",":",[],[("builder",":"),("name","bar"),("out","/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar"),("outputHash","0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33"),("outputHashAlgo","sha1"),("outputHashMode","recursive"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv.json new file mode 100644 index 000000000000..e297d271592f --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "name": "bar", + "out": "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar", + "outputHash": "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33", + "outputHashAlgo": "sha1", + "outputHashMode": "recursive", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "hash": "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33", + "hashAlgo": "r:sha1", + "path": "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv new file mode 100644 index 000000000000..b19fd8eb2ce4 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1","","")],[],[],":",":",[],[("builder",":"),("chars",""),("name","latin1"),("out","/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json new file mode 100644 index 000000000000..ffd5c08da830 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json @@ -0,0 +1,21 @@ +{ + "/nix/store/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv": { + "outputs": { + "out": { + "path": "/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1" + } + }, + "inputSrcs": [], + "inputDrvs": {}, + "system": ":", + "builder": ":", + "args": [], + "env": { + "builder": ":", + "chars": "", + "name": "latin1", + "out": "/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1", + "system": ":" + } + } +} diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs new file mode 100644 index 000000000000..168e11d46f1e --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -0,0 +1,416 @@ +use super::parse_error::ErrorKind; +use crate::derivation::output::Output; +use crate::derivation::parse_error::NomError; +use crate::derivation::parser::Error; +use crate::derivation::Derivation; +use crate::store_path::StorePath; +use bstr::{BStr, BString}; +use std::collections::BTreeSet; +use std::fs::File; +use std::io::Read; +use std::path::Path; +use std::str::FromStr; +use test_case::test_case; +use test_generator::test_resources; + +const RESOURCES_PATHS: &str = "src/derivation/tests/derivation_tests"; + +fn read_file(path: &str) -> BString { + let path = Path::new(path); + let mut file = File::open(path).unwrap(); + let mut file_contents = Vec::new(); + + file.read_to_end(&mut file_contents).unwrap(); + + file_contents.into() +} + +#[test_resources("src/derivation/tests/derivation_tests/ok/*.drv")] +fn check_serialization(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } + let json_bytes = read_file(&format!("{}.json", path_to_drv_file)); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + let mut serialized_derivation = Vec::new(); + derivation.serialize(&mut serialized_derivation).unwrap(); + + let expected = read_file(path_to_drv_file); + + assert_eq!(expected, BStr::new(&serialized_derivation)); +} + +#[test_resources("src/derivation/tests/derivation_tests/ok/*.drv")] +fn validate(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } + let json_bytes = read_file(&format!("{}.json", path_to_drv_file)); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + derivation + .validate(true) + .expect("derivation failed to validate") +} + +#[test_resources("src/derivation/tests/derivation_tests/ok/*.drv")] +fn check_to_aterm_bytes(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } + let json_bytes = read_file(&format!("{}.json", path_to_drv_file)); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + let expected = read_file(path_to_drv_file); + + assert_eq!(expected, BStr::new(&derivation.to_aterm_bytes())); +} + +/// Reads in derivations in ATerm representation, parses with that parser, +/// then compares the structs with the ones obtained by parsing the JSON +/// representations. +#[test_resources("src/derivation/tests/derivation_tests/ok/*.drv")] +fn from_aterm_bytes(path_to_drv_file: &str) { + // Read in ATerm representation. + let aterm_bytes = read_file(path_to_drv_file); + let parsed_drv = Derivation::from_aterm_bytes(&aterm_bytes).expect("must succeed"); + + // For where we're able to load JSON fixtures, parse them and compare the structs. + // For where we're not, compare the bytes manually. + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + assert_eq!( + &[0xc5, 0xc4, 0xd6][..], + parsed_drv.environment.get("chars").unwrap(), + "expected bytes to match", + ); + } else { + let json_bytes = read_file(&format!("{}.json", path_to_drv_file)); + let fixture_derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + assert_eq!(fixture_derivation, parsed_drv); + } + + // Finally, write the ATerm serialization to another buffer, ensuring it's + // stable (and we compare all fields we couldn't compare in the non-utf8 + // derivations) + + assert_eq!( + &aterm_bytes, + &parsed_drv.to_aterm_bytes(), + "expected serialized ATerm to match initial input" + ); +} + +#[test] +fn from_aterm_bytes_duplicate_map_key() { + let buf: Vec<u8> = read_file(&format!("{}/{}", RESOURCES_PATHS, "duplicate.drv")).into(); + + let err = Derivation::from_aterm_bytes(&buf).expect_err("must fail"); + + match err { + Error::Parser(NomError { input: _, code }) => { + assert_eq!(code, ErrorKind::DuplicateMapKey("name".to_string())); + } + _ => { + panic!("unexpected error"); + } + } +} + +/// Read in a derivation in ATerm, but add some garbage at the end. +/// Ensure the parser detects and fails in this case. +#[test] +fn from_aterm_bytes_trailer() { + let mut buf: Vec<u8> = read_file(&format!( + "{}/ok/{}", + RESOURCES_PATHS, "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" + )) + .into(); + + buf.push(0x00); + + Derivation::from_aterm_bytes(&buf).expect_err("must fail"); +} + +#[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")] +#[test_case("foo", "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv"; "simple-sha256")] +#[test_case("bar", "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"; "fixed-sha1")] +#[test_case("foo", "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv"; "simple-sha1")] +#[test_case("has-multi-out", "h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv"; "multiple-outputs")] +#[test_case("structured-attrs", "9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv"; "structured-attrs")] +#[test_case("unicode", "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv"; "unicode")] +fn derivation_path(name: &str, expected_path: &str) { + let json_bytes = read_file(&format!("{}/ok/{}.json", RESOURCES_PATHS, expected_path)); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + assert_eq!( + derivation.calculate_derivation_path(name).unwrap(), + StorePath::from_str(expected_path).unwrap() + ); +} + +/// This trims all output paths from a Derivation struct, +/// by setting outputs[$outputName].path and environment[$outputName] to the empty string. +fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation { + let mut trimmed_env = derivation.environment.clone(); + let mut trimmed_outputs = derivation.outputs.clone(); + + for (output_name, output) in &derivation.outputs { + trimmed_env.insert(output_name.clone(), "".into()); + assert!(trimmed_outputs.contains_key(output_name)); + trimmed_outputs.insert( + output_name.to_string(), + Output { + path: "".to_string(), + ..output.clone() + }, + ); + } + + // replace environment and outputs with the trimmed variants + Derivation { + environment: trimmed_env, + outputs: trimmed_outputs, + ..derivation.clone() + } +} + +#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "sha256:724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")] +#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "sha256:c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")] +fn derivation_or_fod_hash(drv_path: &str, expected_nix_hash_string: &str) { + // read in the fixture + let json_bytes = read_file(&format!("{}/ok/{}.json", RESOURCES_PATHS, drv_path)); + let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize"); + + let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called")); + + assert_eq!(expected_nix_hash_string, actual.to_nix_hex_string()); +} + +/// This reads a Derivation (in A-Term), trims out all fields containing +/// calculated output paths, then triggers the output path calculation and +/// compares the struct to match what was originally read in. +#[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")] +#[test_case("foo", "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv"; "simple-sha256")] +#[test_case("bar", "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"; "fixed-sha1")] +#[test_case("foo", "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv"; "simple-sha1")] +#[test_case("has-multi-out", "h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv"; "multiple-outputs")] +#[test_case("structured-attrs", "9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv"; "structured-attrs")] +#[test_case("unicode", "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv"; "unicode")] +#[test_case("cp1252", "m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv"; "cp1252")] +#[test_case("latin1", "x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv"; "latin1")] +fn output_paths(name: &str, drv_path_str: &str) { + // read in the derivation + let expected_derivation = Derivation::from_aterm_bytes( + read_file(&format!("{}/ok/{}", RESOURCES_PATHS, drv_path_str)).as_ref(), + ) + .expect("must succeed"); + + // create a version with trimmed output paths, simulating we constructed + // the struct. + let mut derivation = derivation_with_trimmed_output_paths(&expected_derivation); + + // calculate the derivation_or_fod_hash of derivation + // We don't expect the lookup function to be called for most derivations. + let calculated_derivation_or_fod_hash = derivation.derivation_or_fod_hash(|parent_drv_path| { + // 4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv may lookup /nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv + // ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv may lookup /nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv + if name == "foo" + && ((drv_path_str == "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv" + && parent_drv_path.to_string() == "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv") + || (drv_path_str == "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv" + && parent_drv_path.to_string() == "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv")) + { + // do the lookup, by reading in the fixture of the requested + // drv_name, and calculating its drv replacement (on the non-stripped version) + // In a real-world scenario you would have already done this during construction. + + let json_bytes = read_file(&format!( + "{}/ok/{}.json", + RESOURCES_PATHS, + Path::new(&parent_drv_path.to_string()) + .file_name() + .unwrap() + .to_string_lossy() + )); + + let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize"); + + // calculate derivation_or_fod_hash for each parent. + // This may not trigger subsequent requests, as both parents are FOD. + drv.derivation_or_fod_hash(|_| panic!("must not lookup")) + } else { + // we only expect this to be called in the "foo" testcase, for the "bar derivations" + panic!("may only be called for foo testcase on bar derivations"); + } + }); + + derivation + .calculate_output_paths(name, &calculated_derivation_or_fod_hash) + .unwrap(); + + // The derivation should now look like it was before + assert_eq!(expected_derivation, derivation); +} + +/// Exercises the output path calculation functions like a constructing client +/// (an implementation of builtins.derivation) would do: +/// +/// ```nix +/// rec { +/// bar = builtins.derivation { +/// name = "bar"; +/// builder = ":"; +/// system = ":"; +/// outputHash = "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba"; +/// outputHashAlgo = "sha256"; +/// outputHashMode = "recursive"; +/// }; +/// +/// foo = builtins.derivation { +/// name = "foo"; +/// builder = ":"; +/// system = ":"; +/// inherit bar; +/// }; +/// } +/// ``` +/// It first assembles the bar derivation, does the output path calculation on +/// it, then continues with the foo derivation. +/// +/// The code ensures the resulting Derivations match our fixtures. +#[test] +fn output_path_construction() { + // create the bar derivation + let mut bar_drv = Derivation { + builder: ":".to_string(), + system: ":".to_string(), + ..Default::default() + }; + + // assemble bar env + let bar_env = &mut bar_drv.environment; + bar_env.insert("builder".to_string(), ":".into()); + bar_env.insert("name".to_string(), "bar".into()); + bar_env.insert("out".to_string(), "".into()); // will be calculated + bar_env.insert( + "outputHash".to_string(), + "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba".into(), + ); + bar_env.insert("outputHashAlgo".to_string(), "sha256".into()); + bar_env.insert("outputHashMode".to_string(), "recursive".into()); + bar_env.insert("system".to_string(), ":".into()); + + // assemble bar outputs + bar_drv.outputs.insert( + "out".to_string(), + Output { + path: "".to_string(), // will be calculated + ca_hash: Some(crate::nixhash::CAHash::Nar( + crate::nixhash::from_algo_and_digest( + crate::nixhash::HashAlgo::Sha256, + &data_encoding::HEXLOWER + .decode( + "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba" + .as_bytes(), + ) + .unwrap(), + ) + .unwrap(), + )), + }, + ); + + // calculate bar output paths + let bar_calc_result = bar_drv.calculate_output_paths( + "bar", + &bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup")), + ); + assert!(bar_calc_result.is_ok()); + + // ensure it matches our bar fixture + let bar_json_bytes = read_file(&format!( + "{}/ok/{}.json", + RESOURCES_PATHS, "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" + )); + let bar_drv_expected: Derivation = + serde_json::from_slice(&bar_json_bytes).expect("must deserialize"); + assert_eq!(bar_drv_expected, bar_drv); + + // now construct foo, which requires bar_drv + // Note how we refer to the output path, drv name and replacement_str (with calculated output paths) of bar. + let bar_output_path = &bar_drv.outputs.get("out").expect("must exist").path; + let bar_drv_derivation_or_fod_hash = + bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup")); + + let bar_drv_path = bar_drv + .calculate_derivation_path("bar") + .expect("must succeed"); + + // create foo derivation + let mut foo_drv = Derivation { + builder: ":".to_string(), + system: ":".to_string(), + ..Default::default() + }; + + // assemble foo env + let foo_env = &mut foo_drv.environment; + foo_env.insert("bar".to_string(), bar_output_path.to_owned().into()); + foo_env.insert("builder".to_string(), ":".into()); + foo_env.insert("name".to_string(), "foo".into()); + foo_env.insert("out".to_string(), "".into()); // will be calculated + foo_env.insert("system".to_string(), ":".into()); + + // asssemble foo outputs + foo_drv.outputs.insert( + "out".to_string(), + Output { + path: "".to_string(), // will be calculated + ca_hash: None, + }, + ); + + // assemble foo input_derivations + foo_drv.input_derivations.insert( + bar_drv_path.to_absolute_path(), + BTreeSet::from(["out".to_string()]), + ); + + // calculate foo output paths + let foo_calc_result = foo_drv.calculate_output_paths( + "foo", + &foo_drv.derivation_or_fod_hash(|drv_path| { + if drv_path.to_string() != "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" { + panic!("lookup called with unexpected drv_path: {}", drv_path); + } + bar_drv_derivation_or_fod_hash.clone() + }), + ); + assert!(foo_calc_result.is_ok()); + + // ensure it matches our foo fixture + let foo_json_bytes = read_file(&format!( + "{}/ok/{}.json", + RESOURCES_PATHS, "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv", + )); + let foo_drv_expected: Derivation = + serde_json::from_slice(&foo_json_bytes).expect("must deserialize"); + assert_eq!(foo_drv_expected, foo_drv); + + assert_eq!( + StorePath::from_str("4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv").expect("must succeed"), + foo_drv + .calculate_derivation_path("foo") + .expect("must succeed") + ); +} diff --git a/tvix/nix-compat/src/derivation/validate.rs b/tvix/nix-compat/src/derivation/validate.rs new file mode 100644 index 000000000000..d711f5ce1de2 --- /dev/null +++ b/tvix/nix-compat/src/derivation/validate.rs @@ -0,0 +1,158 @@ +use crate::derivation::{Derivation, DerivationError}; +use crate::store_path::{self, StorePathRef}; + +impl Derivation { + /// validate ensures a Derivation struct is properly populated, + /// and returns a [DerivationError] if not. + /// + /// if `validate_output_paths` is set to false, the output paths are + /// excluded from validation. + /// + /// This is helpful to validate struct population before invoking + /// [Derivation::calculate_output_paths]. + pub fn validate(&self, validate_output_paths: bool) -> Result<(), DerivationError> { + // Ensure the number of outputs is > 1 + if self.outputs.is_empty() { + return Err(DerivationError::NoOutputs()); + } + + // Validate all outputs + for (output_name, output) in &self.outputs { + // empty output names are invalid. + // + // `drv` is an invalid output name too, as this would cause + // a `builtins.derivation` call to return an attrset with a + // `drvPath` key (which already exists) and has a different + // meaning. + // + // Other output names that don't match the name restrictions from + // [StorePathRef] will fail the [StorePathRef::validate_name] check. + if output_name.is_empty() + || output_name == "drv" + || store_path::validate_name(output_name.as_bytes()).is_err() + { + return Err(DerivationError::InvalidOutputName(output_name.to_string())); + } + + if output.is_fixed() { + if self.outputs.len() != 1 { + return Err(DerivationError::MoreThanOneOutputButFixed()); + } + if output_name != "out" { + return Err(DerivationError::InvalidOutputNameForFixed( + output_name.to_string(), + )); + } + } + + if let Err(e) = output.validate(validate_output_paths) { + return Err(DerivationError::InvalidOutput(output_name.to_string(), e)); + } + } + + // Validate all input_derivations + for (input_derivation_path, output_names) in &self.input_derivations { + // Validate input_derivation_path + if let Err(e) = StorePathRef::from_absolute_path(input_derivation_path.as_bytes()) { + return Err(DerivationError::InvalidInputDerivationPath( + input_derivation_path.to_string(), + e, + )); + } + + if !input_derivation_path.ends_with(".drv") { + return Err(DerivationError::InvalidInputDerivationPrefix( + input_derivation_path.to_string(), + )); + } + + if output_names.is_empty() { + return Err(DerivationError::EmptyInputDerivationOutputNames( + input_derivation_path.to_string(), + )); + } + + for output_name in output_names.iter() { + // empty output names are invalid. + // + // `drv` is an invalid output name too, as this would cause + // a `builtins.derivation` call to return an attrset with a + // `drvPath` key (which already exists) and has a different + // meaning. + // + // Other output names that don't match the name restrictions from + // [StorePath] will fail the [StorePathRef::validate_name] check. + if output_name.is_empty() + || output_name == "drv" + || store_path::validate_name(output_name.as_bytes()).is_err() + { + return Err(DerivationError::InvalidInputDerivationOutputName( + input_derivation_path.to_string(), + output_name.to_string(), + )); + } + } + } + + // Validate all input_sources + for input_source in self.input_sources.iter() { + if let Err(e) = StorePathRef::from_absolute_path(input_source.as_bytes()) { + return Err(DerivationError::InvalidInputSourcesPath( + input_source.to_string(), + e, + )); + } + } + + // validate platform + if self.system.is_empty() { + return Err(DerivationError::InvalidPlatform(self.system.to_string())); + } + + // validate builder + if self.builder.is_empty() { + return Err(DerivationError::InvalidBuilder(self.builder.to_string())); + } + + // validate env, none of the keys may be empty. + // We skip the `name` validation seen in go-nix. + for k in self.environment.keys() { + if k.is_empty() { + return Err(DerivationError::InvalidEnvironmentKey(k.to_string())); + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use std::collections::BTreeMap; + + use crate::derivation::{CAHash, Derivation, Output}; + + /// Regression test: produce a Derivation that's almost valid, except its + /// fixed-output output has the wrong hash specified. + #[test] + fn output_validate() { + let mut outputs = BTreeMap::new(); + outputs.insert( + "out".to_string(), + Output { + path: "".to_string(), + ca_hash: Some(CAHash::Text([0; 32])), // This is disallowed + }, + ); + + let drv = Derivation { + arguments: vec![], + builder: "/bin/sh".to_string(), + outputs, + system: "x86_64-linux".to_string(), + ..Default::default() + }; + + drv.validate(false).expect_err("must fail"); + } +} diff --git a/tvix/nix-compat/src/derivation/write.rs b/tvix/nix-compat/src/derivation/write.rs new file mode 100644 index 000000000000..0981dfccae22 --- /dev/null +++ b/tvix/nix-compat/src/derivation/write.rs @@ -0,0 +1,196 @@ +//! This module implements the serialisation of derivations into the +//! [ATerm][] format used by C++ Nix. +//! +//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html + +use crate::aterm::escape_bytes; +use crate::derivation::{ca_kind_prefix, output::Output}; +use bstr::BString; +use std::{ + collections::{BTreeMap, BTreeSet}, + io, + io::Error, + io::Write, +}; + +pub const DERIVATION_PREFIX: &str = "Derive"; +pub const PAREN_OPEN: char = '('; +pub const PAREN_CLOSE: char = ')'; +pub const BRACKET_OPEN: char = '['; +pub const BRACKET_CLOSE: char = ']'; +pub const COMMA: char = ','; +pub const QUOTE: char = '"'; + +// Writes a character to the writer. +pub(crate) fn write_char(writer: &mut impl Write, c: char) -> io::Result<()> { + let mut buf = [0; 4]; + let b = c.encode_utf8(&mut buf).as_bytes(); + writer.write_all(b) +} + +// Write a string `s` as a quoted field to the writer. +// The `escape` argument controls whether escaping will be skipped. +// This is the case if `s` is known to only contain characters that need no +// escaping. +pub(crate) fn write_field<S: AsRef<[u8]>>( + writer: &mut impl Write, + s: S, + escape: bool, +) -> io::Result<()> { + write_char(writer, QUOTE)?; + + if !escape { + writer.write_all(s.as_ref())?; + } else { + writer.write_all(&escape_bytes(s.as_ref()))?; + } + + write_char(writer, QUOTE)?; + + Ok(()) +} + +fn write_array_elements<S: AsRef<[u8]>>( + writer: &mut impl Write, + elements: &[S], +) -> Result<(), io::Error> { + for (index, element) in elements.iter().enumerate() { + if index > 0 { + write_char(writer, COMMA)?; + } + + write_field(writer, element, true)?; + } + + Ok(()) +} + +pub fn write_outputs( + writer: &mut impl Write, + outputs: &BTreeMap<String, Output>, +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + for (ii, (output_name, output)) in outputs.iter().enumerate() { + if ii > 0 { + write_char(writer, COMMA)?; + } + + write_char(writer, PAREN_OPEN)?; + + let mut elements: Vec<&str> = vec![output_name, &output.path]; + + let (mode_and_algo, digest) = match &output.ca_hash { + Some(ca_hash) => ( + format!("{}{}", ca_kind_prefix(ca_hash), ca_hash.digest().algo()), + data_encoding::HEXLOWER.encode(ca_hash.digest().digest_as_bytes()), + ), + None => ("".to_string(), "".to_string()), + }; + + elements.push(&mode_and_algo); + elements.push(&digest); + + write_array_elements(writer, &elements)?; + + write_char(writer, PAREN_CLOSE)?; + } + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub fn write_input_derivations( + writer: &mut impl Write, + input_derivations: &BTreeMap<String, BTreeSet<String>>, +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + + for (ii, (input_derivation, output_names)) in input_derivations.iter().enumerate() { + if ii > 0 { + write_char(writer, COMMA)?; + } + + write_char(writer, PAREN_OPEN)?; + write_field(writer, input_derivation.as_str(), false)?; + write_char(writer, COMMA)?; + + write_char(writer, BRACKET_OPEN)?; + write_array_elements( + writer, + &output_names + .iter() + .map(String::as_bytes) + .collect::<Vec<_>>(), + )?; + write_char(writer, BRACKET_CLOSE)?; + + write_char(writer, PAREN_CLOSE)?; + } + + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub fn write_input_sources( + writer: &mut impl Write, + input_sources: &BTreeSet<String>, +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + write_array_elements( + writer, + &input_sources.iter().map(String::from).collect::<Vec<_>>(), + )?; + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub fn write_system(writer: &mut impl Write, platform: &str) -> Result<(), Error> { + write_field(writer, platform, true)?; + Ok(()) +} + +pub fn write_builder(writer: &mut impl Write, builder: &str) -> Result<(), Error> { + write_field(writer, builder, true)?; + Ok(()) +} + +pub fn write_arguments(writer: &mut impl Write, arguments: &[String]) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + write_array_elements( + writer, + &arguments + .iter() + .map(|s| s.as_bytes().to_vec().into()) + .collect::<Vec<BString>>(), + )?; + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub fn write_environment<E, K, V>(writer: &mut impl Write, environment: E) -> Result<(), io::Error> +where + E: IntoIterator<Item = (K, V)>, + K: AsRef<[u8]>, + V: AsRef<[u8]>, +{ + write_char(writer, BRACKET_OPEN)?; + + for (i, (k, v)) in environment.into_iter().enumerate() { + if i > 0 { + write_char(writer, COMMA)?; + } + + write_char(writer, PAREN_OPEN)?; + write_field(writer, k, false)?; + write_char(writer, COMMA)?; + write_field(writer, v, true)?; + write_char(writer, PAREN_CLOSE)?; + } + + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} diff --git a/tvix/nix-compat/src/lib.rs b/tvix/nix-compat/src/lib.rs new file mode 100644 index 000000000000..dd161cc1f944 --- /dev/null +++ b/tvix/nix-compat/src/lib.rs @@ -0,0 +1,7 @@ +pub(crate) mod aterm; +pub mod derivation; +pub mod nar; +pub mod narinfo; +pub mod nixbase32; +pub mod nixhash; +pub mod store_path; diff --git a/tvix/nix-compat/src/nar/mod.rs b/tvix/nix-compat/src/nar/mod.rs new file mode 100644 index 000000000000..058977f4fcd1 --- /dev/null +++ b/tvix/nix-compat/src/nar/mod.rs @@ -0,0 +1,4 @@ +mod wire; + +pub mod reader; +pub mod writer; diff --git a/tvix/nix-compat/src/nar/reader/mod.rs b/tvix/nix-compat/src/nar/reader/mod.rs new file mode 100644 index 000000000000..fa7ddc77f96c --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/mod.rs @@ -0,0 +1,293 @@ +//! Parser for the Nix archive format, aka NAR. +//! +//! NAR files (and their hashed representations) are used in C++ Nix for +//! a variety of things, including addressing fixed-output derivations +//! and transferring store paths between Nix stores. + +use std::io::{ + self, BufRead, + ErrorKind::{InvalidData, UnexpectedEof}, + Read, Write, +}; + +// Required reading for understanding this module. +use crate::nar::wire; + +mod read; +#[cfg(test)] +mod test; + +pub type Reader<'a> = dyn BufRead + Send + 'a; + +/// Start reading a NAR file from `reader`. +pub fn open<'a, 'r>(reader: &'a mut Reader<'r>) -> io::Result<Node<'a, 'r>> { + read::token(reader, &wire::TOK_NAR)?; + Node::new(reader) +} + +pub enum Node<'a, 'r> { + Symlink { + target: Vec<u8>, + }, + File { + executable: bool, + reader: FileReader<'a, 'r>, + }, + Directory(DirReader<'a, 'r>), +} + +impl<'a, 'r> Node<'a, 'r> { + /// Start reading a [Node], matching the next [wire::Node]. + /// + /// Reading the terminating [wire::TOK_PAR] is done immediately for [Node::Symlink], + /// but is otherwise left to [DirReader] or [FileReader]. + fn new(reader: &'a mut Reader<'r>) -> io::Result<Self> { + Ok(match read::tag(reader)? { + wire::Node::Sym => { + let target = read::bytes(reader, wire::MAX_TARGET_LEN)?; + + if target.is_empty() || target.contains(&0) { + return Err(InvalidData.into()); + } + + read::token(reader, &wire::TOK_PAR)?; + + Node::Symlink { target } + } + tag @ (wire::Node::Reg | wire::Node::Exe) => { + let len = read::u64(reader)?; + + Node::File { + executable: tag == wire::Node::Exe, + reader: FileReader::new(reader, len)?, + } + } + wire::Node::Dir => Node::Directory(DirReader::new(reader)), + }) + } +} + +/// File contents, readable through the [Read] trait. +/// +/// It comes with some caveats: +/// * You must always read the entire file, unless you intend to abandon the entire archive reader. +/// * You must abandon the entire archive reader upon the first error. +/// +/// It's fine to read exactly `reader.len()` bytes without ever seeing an explicit EOF. +/// +/// TODO(edef): enforce these in `#[cfg(debug_assertions)]` +pub struct FileReader<'a, 'r> { + reader: &'a mut Reader<'r>, + len: u64, + /// Truncated original file length for padding computation. + /// We only care about the 3 least significant bits; semantically, this is a u3. + pad: u8, +} + +impl<'a, 'r> FileReader<'a, 'r> { + /// Instantiate a new reader, starting after [wire::TOK_REG] or [wire::TOK_EXE]. + /// We handle the terminating [wire::TOK_PAR] on semantic EOF. + fn new(reader: &'a mut Reader<'r>, len: u64) -> io::Result<Self> { + // For zero-length files, we have to read the terminating TOK_PAR + // immediately, since FileReader::read may never be called; we've + // already reached semantic EOF by definition. + if len == 0 { + read::token(reader, &wire::TOK_PAR)?; + } + + Ok(Self { + reader, + len, + pad: len as u8, + }) + } + + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + pub fn len(&self) -> u64 { + self.len + } +} + +impl FileReader<'_, '_> { + /// Equivalent to [BufRead::fill_buf] + /// + /// We can't directly implement [BufRead], because [FileReader::consume] needs + /// to perform fallible I/O. + pub fn fill_buf(&mut self) -> io::Result<&[u8]> { + if self.is_empty() { + return Ok(&[]); + } + + let mut buf = self.reader.fill_buf()?; + + if buf.is_empty() { + return Err(UnexpectedEof.into()); + } + + if buf.len() as u64 > self.len { + buf = &buf[..self.len as usize]; + } + + Ok(buf) + } + + /// Analogous to [BufRead::consume], differing only in that it needs + /// to perform I/O in order to read padding and terminators. + pub fn consume(&mut self, n: usize) -> io::Result<()> { + if n == 0 { + return Ok(()); + } + + self.len = self + .len + .checked_sub(n as u64) + .expect("consumed bytes past EOF"); + + self.reader.consume(n); + + if self.is_empty() { + self.finish()?; + } + + Ok(()) + } + + /// Copy the (remaining) contents of the file into `dst`. + pub fn copy(&mut self, mut dst: impl Write) -> io::Result<()> { + while !self.is_empty() { + let buf = self.fill_buf()?; + let n = dst.write(buf)?; + self.consume(n)?; + } + + Ok(()) + } +} + +impl Read for FileReader<'_, '_> { + fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> { + if buf.is_empty() || self.is_empty() { + return Ok(0); + } + + if buf.len() as u64 > self.len { + buf = &mut buf[..self.len as usize]; + } + + let n = self.reader.read(buf)?; + self.len -= n as u64; + + if n == 0 { + return Err(UnexpectedEof.into()); + } + + if self.is_empty() { + self.finish()?; + } + + Ok(n) + } +} + +impl FileReader<'_, '_> { + /// We've reached semantic EOF, consume and verify the padding and terminating TOK_PAR. + /// Files are padded to 64 bits (8 bytes), just like any other byte string in the wire format. + fn finish(&mut self) -> io::Result<()> { + let pad = (self.pad & 7) as usize; + + if pad != 0 { + let mut buf = [0; 8]; + self.reader.read_exact(&mut buf[pad..])?; + + if buf != [0; 8] { + return Err(InvalidData.into()); + } + } + + read::token(self.reader, &wire::TOK_PAR) + } +} + +/// A directory iterator, yielding a sequence of [Node]s. +/// It must be fully consumed before reading further from the [DirReader] that produced it, if any. +pub struct DirReader<'a, 'r> { + reader: &'a mut Reader<'r>, + /// Previous directory entry name. + /// We have to hang onto this to enforce name monotonicity. + prev_name: Option<Vec<u8>>, +} + +pub struct Entry<'a, 'r> { + pub name: Vec<u8>, + pub node: Node<'a, 'r>, +} + +impl<'a, 'r> DirReader<'a, 'r> { + fn new(reader: &'a mut Reader<'r>) -> Self { + Self { + reader, + prev_name: None, + } + } + + /// Read the next [Entry] from the directory. + /// + /// We explicitly don't implement [Iterator], since treating this as + /// a regular Rust iterator will surely lead you astray. + /// + /// * You must always consume the entire iterator, unless you abandon the entire archive reader. + /// * You must abandon the entire archive reader on the first error. + /// * You must abandon the directory reader upon the first [None]. + /// * Even if you know the amount of elements up front, you must keep reading until you encounter [None]. + /// + /// TODO(edef): enforce these in `#[cfg(debug_assertions)]` + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> io::Result<Option<Entry>> { + // COME FROM the previous iteration: if we've already read an entry, + // read its terminating TOK_PAR here. + if self.prev_name.is_some() { + read::token(self.reader, &wire::TOK_PAR)?; + } + + // Determine if there are more entries to follow + if let wire::Entry::None = read::tag(self.reader)? { + // We've reached the end of this directory. + return Ok(None); + } + + let name = read::bytes(self.reader, wire::MAX_NAME_LEN)?; + + if name.is_empty() + || name.contains(&0) + || name.contains(&b'/') + || name == b"." + || name == b".." + { + return Err(InvalidData.into()); + } + + // Enforce strict monotonicity of directory entry names. + match &mut self.prev_name { + None => { + self.prev_name = Some(name.clone()); + } + Some(prev_name) => { + if *prev_name >= name { + return Err(InvalidData.into()); + } + + name[..].clone_into(prev_name); + } + } + + read::token(self.reader, &wire::TOK_NOD)?; + + Ok(Some(Entry { + name, + node: Node::new(&mut self.reader)?, + })) + } +} diff --git a/tvix/nix-compat/src/nar/reader/read.rs b/tvix/nix-compat/src/nar/reader/read.rs new file mode 100644 index 000000000000..1ce161376424 --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/read.rs @@ -0,0 +1,109 @@ +//! Helpers for reading [crate::nar::wire] format. + +use std::io::{ + self, + ErrorKind::{Interrupted, InvalidData, UnexpectedEof}, +}; + +use super::Reader; +use crate::nar::wire::Tag; + +/// Consume a little-endian [prim@u64] from the reader. +pub fn u64(reader: &mut Reader) -> io::Result<u64> { + let mut buf = [0; 8]; + reader.read_exact(&mut buf)?; + Ok(u64::from_le_bytes(buf)) +} + +/// Consume a byte string of up to `max_len` bytes from the reader. +pub fn bytes(reader: &mut Reader, max_len: usize) -> io::Result<Vec<u8>> { + assert!(max_len <= isize::MAX as usize); + + // read the length, and reject excessively large values + let len = self::u64(reader)?; + if len > max_len as u64 { + return Err(InvalidData.into()); + } + // we know the length fits in a usize now + let len = len as usize; + + // read the data and padding into a buffer + let buf_len = (len + 7) & !7; + let mut buf = vec![0; buf_len]; + reader.read_exact(&mut buf)?; + + // verify that the padding is all zeroes + for b in buf.drain(len..) { + if b != 0 { + return Err(InvalidData.into()); + } + } + + Ok(buf) +} + +/// Consume a known token from the reader. +pub fn token<const N: usize>(reader: &mut Reader, token: &[u8; N]) -> io::Result<()> { + let mut buf = [0u8; N]; + + // This implements something similar to [Read::read_exact], but verifies that + // the input data matches the token while we read it. These two slices respectively + // represent the remaining token to be verified, and the remaining input buffer. + let mut token = &token[..]; + let mut buf = &mut buf[..]; + + while !token.is_empty() { + match reader.read(buf) { + Ok(0) => { + return Err(UnexpectedEof.into()); + } + Ok(n) => { + let (t, b); + (t, token) = token.split_at(n); + (b, buf) = buf.split_at_mut(n); + + if t != b { + return Err(InvalidData.into()); + } + } + Err(e) => { + if e.kind() != Interrupted { + return Err(e); + } + } + } + } + + Ok(()) +} + +/// Consume a [Tag] from the reader. +pub fn tag<T: Tag>(reader: &mut Reader) -> io::Result<T> { + let mut buf = T::make_buf(); + let buf = buf.as_mut(); + + // first read the known minimum length… + reader.read_exact(&mut buf[..T::MIN])?; + + // then decide which tag we're expecting + let tag = T::from_u8(buf[T::OFF]).ok_or(InvalidData)?; + let (head, tail) = tag.as_bytes().split_at(T::MIN); + + // make sure what we've read so far is valid + if buf[..T::MIN] != *head { + return Err(InvalidData.into()); + } + + // …then read the rest, if any + if !tail.is_empty() { + let rest = tail.len(); + reader.read_exact(&mut buf[..rest])?; + + // and make sure it's what we expect + if buf[..rest] != *tail { + return Err(InvalidData.into()); + } + } + + Ok(tag) +} diff --git a/tvix/nix-compat/src/nar/reader/test.rs b/tvix/nix-compat/src/nar/reader/test.rs new file mode 100644 index 000000000000..fd0d6a9f5afd --- /dev/null +++ b/tvix/nix-compat/src/nar/reader/test.rs @@ -0,0 +1,120 @@ +use std::io::Read; + +use crate::nar; + +#[test] +fn symlink() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/symlink.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Symlink { target } => { + assert_eq!( + &b"/nix/store/somewhereelse"[..], + &target, + "target must match" + ); + } + _ => panic!("unexpected type"), + } +} + +#[test] +fn file() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/helloworld.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + let mut buf = vec![]; + reader.read_to_end(&mut buf).expect("read must succeed"); + assert_eq!(&b"Hello World!"[..], &buf); + } + _ => panic!("unexpected type"), + } +} + +#[test] +fn complicated() { + let mut f = std::io::Cursor::new(include_bytes!("../tests/complicated.nar")); + let node = nar::reader::open(&mut f).unwrap(); + + match node { + nar::reader::Node::Directory(mut dir_reader) => { + // first entry is .keep, an empty regular file. + let entry = dir_reader + .next() + .expect("next must succeed") + .expect("must be some"); + + assert_eq!(&b".keep"[..], &entry.name); + + match entry.node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + assert_eq!(reader.read(&mut [0]).unwrap(), 0); + } + _ => panic!("unexpected type for .keep"), + } + + // second entry is aa, a symlink to /nix/store/somewhereelse + let entry = dir_reader + .next() + .expect("next must be some") + .expect("must be some"); + + assert_eq!(&b"aa"[..], &entry.name); + + match entry.node { + nar::reader::Node::Symlink { target } => { + assert_eq!(&b"/nix/store/somewhereelse"[..], &target); + } + _ => panic!("unexpected type for aa"), + } + + // third entry is a directory called "keep" + let entry = dir_reader + .next() + .expect("next must be some") + .expect("must be some"); + + assert_eq!(&b"keep"[..], &entry.name); + + match entry.node { + nar::reader::Node::Directory(mut subdir_reader) => { + // first entry is .keep, an empty regular file. + let entry = subdir_reader + .next() + .expect("next must succeed") + .expect("must be some"); + + // … it contains a single .keep, an empty regular file. + assert_eq!(&b".keep"[..], &entry.name); + + match entry.node { + nar::reader::Node::File { + executable, + mut reader, + } => { + assert!(!executable); + assert_eq!(reader.read(&mut [0]).unwrap(), 0); + } + _ => panic!("unexpected type for keep/.keep"), + } + } + _ => panic!("unexpected type for keep/.keep"), + } + + // reading more entries yields None (and we actually must read until this) + assert!(dir_reader.next().expect("must succeed").is_none()); + } + _ => panic!("unexpected type"), + } +} diff --git a/tvix/nix-compat/src/nar/tests/complicated.nar b/tvix/nix-compat/src/nar/tests/complicated.nar new file mode 100644 index 000000000000..6a137f5fbb6b --- /dev/null +++ b/tvix/nix-compat/src/nar/tests/complicated.nar Binary files differdiff --git a/tvix/nix-compat/src/nar/tests/helloworld.nar b/tvix/nix-compat/src/nar/tests/helloworld.nar new file mode 100644 index 000000000000..2e1268115205 --- /dev/null +++ b/tvix/nix-compat/src/nar/tests/helloworld.nar Binary files differdiff --git a/tvix/nix-compat/src/nar/tests/symlink.nar b/tvix/nix-compat/src/nar/tests/symlink.nar new file mode 100644 index 000000000000..7990e4ad5bc2 --- /dev/null +++ b/tvix/nix-compat/src/nar/tests/symlink.nar Binary files differdiff --git a/tvix/nix-compat/src/nar/wire/mod.rs b/tvix/nix-compat/src/nar/wire/mod.rs new file mode 100644 index 000000000000..b9e021249543 --- /dev/null +++ b/tvix/nix-compat/src/nar/wire/mod.rs @@ -0,0 +1,133 @@ +//! NAR wire format, without I/O details, since those differ between +//! the synchronous and asynchronous implementations. +//! +//! The wire format is an S-expression format, encoded onto the wire +//! using simple encoding rules. +//! +//! # Encoding +//! +//! Lengths are represented as 64-bit unsigned integers in little-endian +//! format. Byte strings, including file contents and syntactic strings +//! part of the grammar, are prefixed by their 64-bit length, and padded +//! to 8-byte (64-bit) alignment with zero bytes. The zero-length string +//! is therefore encoded as eight zero bytes representing its length. +//! +//! # Grammar +//! +//! The NAR grammar is as follows: +//! ```plain +//! archive ::= "nix-archive-1" node +//! +//! node ::= "(" "type" "symlink" "target" string ")" +//! ||= "(" "type" "regular" ("executable" "")? "contents" string ")" +//! ||= "(" "type" "directory" entry* ")" +//! +//! entry ::= "entry" "(" "name" string "node" node ")" +//! ``` +//! +//! We rewrite it to pull together the purely syntactic elements into +//! unified tokens, producing an equivalent grammar that can be parsed +//! and serialized more elegantly: +//! ```plain +//! archive ::= TOK_NAR node +//! node ::= TOK_SYM string TOK_PAR +//! ||= (TOK_REG | TOK_EXE) string TOK_PAR +//! ||= TOK_DIR entry* TOK_PAR +//! +//! entry ::= TOK_ENT string TOK_NOD node TOK_PAR +//! +//! TOK_NAR ::= "nix-archive-1" "(" "type" +//! TOK_SYM ::= "symlink" "target" +//! TOK_REG ::= "regular" "contents" +//! TOK_EXE ::= "regular" "executable" "" +//! TOK_DIR ::= "directory" +//! TOK_ENT ::= "entry" "(" "name" +//! TOK_NOD ::= "node" "(" "type" +//! TOK_PAR ::= ")" +//! ``` +//! +//! # Restrictions +//! +//! NOTE: These restrictions are not (and cannot be) enforced by this module, +//! but must be enforced by its consumers, [super::reader] and [super::writer]. +//! +//! Directory entry names cannot have the reserved names `.` and `..`, nor contain +//! forward slashes. They must appear in strictly ascending lexicographic order +//! within a directory, and can be at most [MAX_NAME_LEN] bytes in length. +//! +//! Symlink targets can be at most [MAX_TARGET_LEN] bytes in length. +//! +//! Neither is permitted to be empty, or contain null bytes. + +// These values are the standard Linux length limits +/// Maximum length of a directory entry name +pub const MAX_NAME_LEN: usize = 255; +/// Maximum length of a symlink target +pub const MAX_TARGET_LEN: usize = 4095; + +#[cfg(test)] +fn token(xs: &[&str]) -> Vec<u8> { + let mut out = vec![]; + for x in xs { + let len = x.len() as u64; + out.extend_from_slice(&len.to_le_bytes()); + out.extend_from_slice(x.as_bytes()); + + let n = x.len() & 7; + if n != 0 { + const ZERO: [u8; 8] = [0; 8]; + out.extend_from_slice(&ZERO[n..]); + } + } + out +} + +pub const TOK_NAR: [u8; 56] = *b"\x0d\0\0\0\0\0\0\0nix-archive-1\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; +pub const TOK_SYM: [u8; 32] = *b"\x07\0\0\0\0\0\0\0symlink\0\x06\0\0\0\0\0\0\0target\0\0"; +pub const TOK_REG: [u8; 32] = *b"\x07\0\0\0\0\0\0\0regular\0\x08\0\0\0\0\0\0\0contents"; +pub const TOK_EXE: [u8; 64] = *b"\x07\0\0\0\0\0\0\0regular\0\x0a\0\0\0\0\0\0\0executable\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0contents"; +pub const TOK_DIR: [u8; 24] = *b"\x09\0\0\0\0\0\0\0directory\0\0\0\0\0\0\0"; +pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0name\0\0\0\0"; +pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; +pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0"; + +#[test] +fn tokens() { + let cases: &[(&[u8], &[&str])] = &[ + (&TOK_NAR, &["nix-archive-1", "(", "type"]), + (&TOK_SYM, &["symlink", "target"]), + (&TOK_REG, &["regular", "contents"]), + (&TOK_EXE, &["regular", "executable", "", "contents"]), + (&TOK_DIR, &["directory"]), + (&TOK_ENT, &["entry", "(", "name"]), + (&TOK_NOD, &["node", "(", "type"]), + (&TOK_PAR, &[")"]), + ]; + + for &(tok, xs) in cases { + assert_eq!(tok, token(xs)); + } +} + +pub use tag::Tag; +mod tag; + +tag::make! { + /// These are the node tokens, succeeding [TOK_NAR] or [TOK_NOD], + /// and preceding the next variable-length element. + pub enum Node[16] { + Sym = TOK_SYM, + Reg = TOK_REG, + Exe = TOK_EXE, + Dir = TOK_DIR, + } + + /// Directory entry or terminator + pub enum Entry[0] { + /// End of directory + None = TOK_PAR, + /// Directory entry + /// Followed by a name string, [TOK_NOD], and a [Node]. + Some = TOK_ENT, + } +} diff --git a/tvix/nix-compat/src/nar/wire/tag.rs b/tvix/nix-compat/src/nar/wire/tag.rs new file mode 100644 index 000000000000..55b93f998541 --- /dev/null +++ b/tvix/nix-compat/src/nar/wire/tag.rs @@ -0,0 +1,165 @@ +/// A type implementing Tag represents a static hash set of byte strings, +/// with a very simple perfect hash function: every element has a unique +/// discriminant at a common byte offset. The values of the type represent +/// the members by this single discriminant byte; they are indices into the +/// hash set. +pub trait Tag: Sized { + /// Discriminant offset + const OFF: usize; + /// Minimum variant length + const MIN: usize; + + /// Minimal suitably sized buffer for reading the wire representation + /// HACK: This is a workaround for const generics limitations. + type Buf: AsMut<[u8]> + Send; + + /// Make an instance of [Self::Buf] + fn make_buf() -> Self::Buf; + + /// Convert a discriminant into the corresponding variant + fn from_u8(x: u8) -> Option<Self>; + + /// Convert a variant back into the wire representation + fn as_bytes(&self) -> &'static [u8]; +} + +/// Generate an enum implementing [Tag], enforcing at compile time that +/// the discriminant values are distinct. +macro_rules! make { + ( + $( + $(#[doc = $doc:expr])* + $vis:vis enum $Enum:ident[$off:expr] { + $( + $(#[doc = $var_doc:expr])* + $Var:ident = $TOK:ident, + )+ + } + )* + ) => { + $( + $(#[doc = $doc])* + #[derive(Debug, PartialEq, Eq)] + #[repr(u8)] + $vis enum $Enum { + $( + $(#[doc = $var_doc])* + $Var = $TOK[$Enum::OFF] + ),+ + } + + impl Tag for $Enum { + /// Discriminant offset + const OFF: usize = $off; + /// Minimum variant length + const MIN: usize = tag::min_of(&[$($TOK.len()),+]); + + /// Minimal suitably sized buffer for reading the wire representation + type Buf = [u8; tag::buf_of(&[$($TOK.len()),+])]; + + /// Make an instance of [Self::Buf] + #[inline(always)] + fn make_buf() -> Self::Buf { + [0u8; tag::buf_of(&[$($TOK.len()),+])] + } + + /// Convert a discriminant into the corresponding variant + #[inline(always)] + fn from_u8(x: u8) -> Option<Self> { + #[allow(non_upper_case_globals)] + mod __variant { + $( + pub const $Var: u8 = super::$Enum::$Var as u8; + )+ + } + + match x { + $(__variant::$Var => Some(Self::$Var),)+ + _ => None + } + } + + /// Convert a variant back into the wire representation + #[inline(always)] + fn as_bytes(&self) -> &'static [u8] { + match self { + $(Self::$Var => &$TOK,)+ + } + } + } + )* + }; +} + +// The following functions are written somewhat unusually, +// since they're const functions that cannot use iterators. + +/// Maximum element of a slice +const fn max_of(mut xs: &[usize]) -> usize { + let mut y = usize::MIN; + while let &[x, ref tail @ ..] = xs { + y = if x > y { x } else { y }; + xs = tail; + } + y +} + +/// Minimum element of a slice +pub const fn min_of(mut xs: &[usize]) -> usize { + let mut y = usize::MAX; + while let &[x, ref tail @ ..] = xs { + y = if x < y { x } else { y }; + xs = tail; + } + y +} + +/// Minimum buffer size to contain either of `0..Tag::MIN` and `Tag::MIN..` +/// at a particular time, for all possible tag wire representations, given +/// the sizes of all wire representations. +/// +/// # Example +/// +/// ```plain +/// OFF = 16 +/// MIN = 24 +/// MAX = 64 +/// +/// BUF = max(MIN, MAX-MIN) +/// = max(24, 64-24) +/// = max(24, 40) +/// = 40 +/// ``` +pub const fn buf_of(xs: &[usize]) -> usize { + max_of(&[min_of(xs), max_of(xs) - min_of(xs)]) +} + +pub(crate) use make; + +#[cfg(test)] +mod test { + use super::super::tag::{self, Tag}; + + const TOK_A: [u8; 3] = [0xed, 0xef, 0x1c]; + const TOK_B: [u8; 3] = [0xed, 0xf0, 0x1c]; + + const OFFSET: usize = 1; + + make! { + enum Token[OFFSET] { + A = TOK_A, + B = TOK_B, + } + } + + #[test] + fn example() { + assert_eq!(Token::from_u8(0xed), None); + + let tag = Token::from_u8(0xef).unwrap(); + assert_eq!(tag.as_bytes(), &TOK_A[..]); + + let tag = Token::from_u8(0xf0).unwrap(); + assert_eq!(tag.as_bytes(), &TOK_B[..]); + } +} diff --git a/tvix/nix-compat/src/nar/writer/async.rs b/tvix/nix-compat/src/nar/writer/async.rs new file mode 100644 index 000000000000..11aefab9cba7 --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/async.rs @@ -0,0 +1,235 @@ +//! Implements an interface for writing the Nix archive format (NAR). +//! +//! NAR files (and their hashed representations) are used in C++ Nix for +//! addressing fixed-output derivations and a variety of other things. +//! +//! NAR files can be output to any type that implements [`AsyncWrite`], and content +//! can be read from any type that implementes [`AsyncBufRead`]. +//! +//! Writing a single file might look like this: +//! +//! ```rust +//! # futures::executor::block_on(async { +//! # use futures::io::BufReader; +//! # let some_file: Vec<u8> = vec![0, 1, 2, 3, 4]; +//! +//! // Output location to write the NAR to. +//! let mut sink: Vec<u8> = Vec::new(); +//! +//! // Instantiate writer for this output location. +//! let mut nar = nix_compat::nar::writer::r#async::open(&mut sink).await?; +//! +//! // Acquire metadata for the single file to output, and pass it in a +//! // `BufRead`-implementing type. +//! +//! let executable = false; +//! let size = some_file.len() as u64; +//! let mut reader = BufReader::new(some_file.as_slice()); +//! nar.file(executable, size, &mut reader).await?; +//! # Ok::<(), std::io::Error>(()) +//! # }); +//! ``` + +use crate::nar::wire; +use futures_util::{AsyncBufRead, AsyncBufReadExt, AsyncWrite, AsyncWriteExt}; +use std::{ + io::{ + self, + ErrorKind::{InvalidInput, UnexpectedEof}, + }, + pin::Pin, +}; + +/// Convenience type alias for types implementing [`AsyncWrite`]. +pub type Writer<'a> = dyn AsyncWrite + Unpin + Send + 'a; + +/// Create a new NAR, writing the output to the specified writer. +pub async fn open<'a, 'w: 'a>(writer: &'a mut Writer<'w>) -> io::Result<Node<'a, 'w>> { + let mut node = Node { writer }; + node.write(&wire::TOK_NAR).await?; + Ok(node) +} + +/// Single node in a NAR file. +/// +/// A NAR can be thought of as a tree of nodes represented by this type. Each +/// node can be a file, a symlink or a directory containing other nodes. +pub struct Node<'a, 'w: 'a> { + writer: &'a mut Writer<'w>, +} + +impl<'a, 'w> Node<'a, 'w> { + async fn write(&mut self, data: &[u8]) -> io::Result<()> { + self.writer.write_all(data).await + } + + async fn pad(&mut self, n: u64) -> io::Result<()> { + match (n & 7) as usize { + 0 => Ok(()), + n => self.write(&[0; 8][n..]).await, + } + } + + /// Make this node a symlink. + pub async fn symlink(mut self, target: &[u8]) -> io::Result<()> { + debug_assert!( + target.len() <= wire::MAX_TARGET_LEN, + "target.len() > {}", + wire::MAX_TARGET_LEN + ); + debug_assert!(!target.is_empty(), "target is empty"); + debug_assert!(!target.contains(&0), "target contains null byte"); + + self.write(&wire::TOK_SYM).await?; + self.write(&target.len().to_le_bytes()).await?; + self.write(target).await?; + self.pad(target.len() as u64).await?; + self.write(&wire::TOK_PAR).await?; + Ok(()) + } + + /// Make this node a single file. + pub async fn file( + mut self, + executable: bool, + size: u64, + reader: &mut (dyn AsyncBufRead + Unpin + Send), + ) -> io::Result<()> { + self.write(if executable { + &wire::TOK_EXE + } else { + &wire::TOK_REG + }) + .await?; + + self.write(&size.to_le_bytes()).await?; + + let mut need = size; + while need != 0 { + let data = reader.fill_buf().await?; + + if data.is_empty() { + return Err(UnexpectedEof.into()); + } + + let n = need.min(data.len() as u64) as usize; + self.write(&data[..n]).await?; + + need -= n as u64; + Pin::new(&mut *reader).consume(n); + } + + // bail if there's still data left in the passed reader. + // This uses the same code as [BufRead::has_data_left] (unstable). + if reader.fill_buf().await.map(|b| !b.is_empty())? { + return Err(io::Error::new( + InvalidInput, + "reader contained more data than specified size", + )); + } + + self.pad(size).await?; + self.write(&wire::TOK_PAR).await?; + + Ok(()) + } + + /// Make this node a directory, the content of which is set using the + /// resulting [`Directory`] value. + /// + /// It is the caller's responsibility to invoke [`Directory::close`], + /// or invalid archives will be produced silently. + pub async fn directory(mut self) -> io::Result<Directory<'a, 'w>> { + self.write(&wire::TOK_DIR).await?; + Ok(Directory::new(self)) + } +} + +#[cfg(debug_assertions)] +type Name = Vec<u8>; +#[cfg(not(debug_assertions))] +type Name = (); + +fn into_name(_name: &[u8]) -> Name { + #[cfg(debug_assertions)] + _name.to_owned() +} + +/// Content of a NAR node that represents a directory. +pub struct Directory<'a, 'w> { + node: Node<'a, 'w>, + prev_name: Option<Name>, +} + +impl<'a, 'w> Directory<'a, 'w> { + fn new(node: Node<'a, 'w>) -> Self { + Self { + node, + prev_name: None, + } + } + + /// Add an entry to the directory. + /// + /// The entry is simply another [`Node`], which can then be filled like the + /// root of a NAR (including, of course, by nesting directories). + /// + /// It is the caller's responsibility to ensure that directory entries are + /// written in order of ascending name. If this is not ensured, this method + /// may panic or silently produce invalid archives. + pub async fn entry(&mut self, name: &[u8]) -> io::Result<Node<'_, 'w>> { + debug_assert!( + name.len() <= wire::MAX_NAME_LEN, + "name.len() > {}", + wire::MAX_NAME_LEN + ); + debug_assert!(!name.is_empty(), "name is empty"); + debug_assert!(!name.contains(&0), "name contains null byte"); + debug_assert!(!name.contains(&b'/'), "name contains {:?}", '/'); + debug_assert!(name != b".", "name == {:?}", "."); + debug_assert!(name != b"..", "name == {:?}", ".."); + + match self.prev_name { + None => { + self.prev_name = Some(into_name(name)); + } + Some(ref mut _prev_name) => { + #[cfg(debug_assertions)] + { + use bstr::ByteSlice; + assert!( + &**_prev_name < name, + "misordered names: {:?} >= {:?}", + _prev_name.as_bstr(), + name.as_bstr() + ); + name.clone_into(_prev_name); + } + self.node.write(&wire::TOK_PAR).await?; + } + } + + self.node.write(&wire::TOK_ENT).await?; + self.node.write(&name.len().to_le_bytes()).await?; + self.node.write(name).await?; + self.node.pad(name.len() as u64).await?; + self.node.write(&wire::TOK_NOD).await?; + + Ok(Node { + writer: &mut *self.node.writer, + }) + } + + /// Close a directory and write terminators for the directory to the NAR. + /// + /// **Important:** This *must* be called when all entries have been written + /// in a directory, otherwise the resulting NAR file will be invalid. + pub async fn close(mut self) -> io::Result<()> { + if self.prev_name.is_some() { + self.node.write(&wire::TOK_PAR).await?; + } + + self.node.write(&wire::TOK_PAR).await?; + Ok(()) + } +} diff --git a/tvix/nix-compat/src/nar/writer/mod.rs b/tvix/nix-compat/src/nar/writer/mod.rs new file mode 100644 index 000000000000..fe8ccccb3787 --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/mod.rs @@ -0,0 +1,9 @@ +pub use sync::*; + +pub mod sync; + +#[cfg(test)] +mod test; + +#[cfg(feature = "async")] +pub mod r#async; diff --git a/tvix/nix-compat/src/nar/writer/sync.rs b/tvix/nix-compat/src/nar/writer/sync.rs new file mode 100644 index 000000000000..6270129028fa --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/sync.rs @@ -0,0 +1,224 @@ +//! Implements an interface for writing the Nix archive format (NAR). +//! +//! NAR files (and their hashed representations) are used in C++ Nix for +//! addressing fixed-output derivations and a variety of other things. +//! +//! NAR files can be output to any type that implements [`Write`], and content +//! can be read from any type that implementes [`BufRead`]. +//! +//! Writing a single file might look like this: +//! +//! ```rust +//! # use std::io::BufReader; +//! # let some_file: Vec<u8> = vec![0, 1, 2, 3, 4]; +//! +//! // Output location to write the NAR to. +//! let mut sink: Vec<u8> = Vec::new(); +//! +//! // Instantiate writer for this output location. +//! let mut nar = nix_compat::nar::writer::open(&mut sink)?; +//! +//! // Acquire metadata for the single file to output, and pass it in a +//! // `BufRead`-implementing type. +//! +//! let executable = false; +//! let size = some_file.len() as u64; +//! let mut reader = BufReader::new(some_file.as_slice()); +//! nar.file(executable, size, &mut reader)?; +//! # Ok::<(), std::io::Error>(()) +//! ``` + +use crate::nar::wire; +use std::io::{ + self, BufRead, + ErrorKind::{InvalidInput, UnexpectedEof}, + Write, +}; + +/// Convenience type alias for types implementing [`Write`]. +pub type Writer<'a> = dyn Write + Send + 'a; + +/// Create a new NAR, writing the output to the specified writer. +pub fn open<'a, 'w: 'a>(writer: &'a mut Writer<'w>) -> io::Result<Node<'a, 'w>> { + let mut node = Node { writer }; + node.write(&wire::TOK_NAR)?; + Ok(node) +} + +/// Single node in a NAR file. +/// +/// A NAR can be thought of as a tree of nodes represented by this type. Each +/// node can be a file, a symlink or a directory containing other nodes. +pub struct Node<'a, 'w: 'a> { + writer: &'a mut Writer<'w>, +} + +impl<'a, 'w> Node<'a, 'w> { + fn write(&mut self, data: &[u8]) -> io::Result<()> { + self.writer.write_all(data) + } + + fn pad(&mut self, n: u64) -> io::Result<()> { + match (n & 7) as usize { + 0 => Ok(()), + n => self.write(&[0; 8][n..]), + } + } + + /// Make this node a symlink. + pub fn symlink(mut self, target: &[u8]) -> io::Result<()> { + debug_assert!( + target.len() <= wire::MAX_TARGET_LEN, + "target.len() > {}", + wire::MAX_TARGET_LEN + ); + debug_assert!(!target.is_empty(), "target is empty"); + debug_assert!(!target.contains(&0), "target contains null byte"); + + self.write(&wire::TOK_SYM)?; + self.write(&target.len().to_le_bytes())?; + self.write(target)?; + self.pad(target.len() as u64)?; + self.write(&wire::TOK_PAR)?; + Ok(()) + } + + /// Make this node a single file. + pub fn file(mut self, executable: bool, size: u64, reader: &mut dyn BufRead) -> io::Result<()> { + self.write(if executable { + &wire::TOK_EXE + } else { + &wire::TOK_REG + })?; + + self.write(&size.to_le_bytes())?; + + let mut need = size; + while need != 0 { + let data = reader.fill_buf()?; + + if data.is_empty() { + return Err(UnexpectedEof.into()); + } + + let n = need.min(data.len() as u64) as usize; + self.write(&data[..n])?; + + need -= n as u64; + reader.consume(n); + } + + // bail if there's still data left in the passed reader. + // This uses the same code as [BufRead::has_data_left] (unstable). + if reader.fill_buf().map(|b| !b.is_empty())? { + return Err(io::Error::new( + InvalidInput, + "reader contained more data than specified size", + )); + } + + self.pad(size)?; + self.write(&wire::TOK_PAR)?; + + Ok(()) + } + + /// Make this node a directory, the content of which is set using the + /// resulting [`Directory`] value. + /// + /// It is the caller's responsibility to invoke [`Directory::close`], + /// or invalid archives will be produced silently. + pub fn directory(mut self) -> io::Result<Directory<'a, 'w>> { + self.write(&wire::TOK_DIR)?; + Ok(Directory::new(self)) + } +} + +#[cfg(debug_assertions)] +type Name = Vec<u8>; +#[cfg(not(debug_assertions))] +type Name = (); + +fn into_name(_name: &[u8]) -> Name { + #[cfg(debug_assertions)] + _name.to_owned() +} + +/// Content of a NAR node that represents a directory. +pub struct Directory<'a, 'w> { + node: Node<'a, 'w>, + prev_name: Option<Name>, +} + +impl<'a, 'w> Directory<'a, 'w> { + fn new(node: Node<'a, 'w>) -> Self { + Self { + node, + prev_name: None, + } + } + + /// Add an entry to the directory. + /// + /// The entry is simply another [`Node`], which can then be filled like the + /// root of a NAR (including, of course, by nesting directories). + /// + /// It is the caller's responsibility to ensure that directory entries are + /// written in order of ascending name. If this is not ensured, this method + /// may panic or silently produce invalid archives. + pub fn entry(&mut self, name: &[u8]) -> io::Result<Node<'_, 'w>> { + debug_assert!( + name.len() <= wire::MAX_NAME_LEN, + "name.len() > {}", + wire::MAX_NAME_LEN + ); + debug_assert!(!name.is_empty(), "name is empty"); + debug_assert!(!name.contains(&0), "name contains null byte"); + debug_assert!(!name.contains(&b'/'), "name contains {:?}", '/'); + debug_assert!(name != b".", "name == {:?}", "."); + debug_assert!(name != b"..", "name == {:?}", ".."); + + match self.prev_name { + None => { + self.prev_name = Some(into_name(name)); + } + Some(ref mut _prev_name) => { + #[cfg(debug_assertions)] + { + use bstr::ByteSlice; + assert!( + &**_prev_name < name, + "misordered names: {:?} >= {:?}", + _prev_name.as_bstr(), + name.as_bstr() + ); + name.clone_into(_prev_name); + } + self.node.write(&wire::TOK_PAR)?; + } + } + + self.node.write(&wire::TOK_ENT)?; + self.node.write(&name.len().to_le_bytes())?; + self.node.write(name)?; + self.node.pad(name.len() as u64)?; + self.node.write(&wire::TOK_NOD)?; + + Ok(Node { + writer: &mut *self.node.writer, + }) + } + + /// Close a directory and write terminators for the directory to the NAR. + /// + /// **Important:** This *must* be called when all entries have been written + /// in a directory, otherwise the resulting NAR file will be invalid. + pub fn close(mut self) -> io::Result<()> { + if self.prev_name.is_some() { + self.node.write(&wire::TOK_PAR)?; + } + + self.node.write(&wire::TOK_PAR)?; + Ok(()) + } +} diff --git a/tvix/nix-compat/src/nar/writer/test.rs b/tvix/nix-compat/src/nar/writer/test.rs new file mode 100644 index 000000000000..7b1dd1a2a96e --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/test.rs @@ -0,0 +1,133 @@ +use crate::nar; + +#[test] +fn symlink() { + let mut buf = vec![]; + let node = nar::writer::open(&mut buf).unwrap(); + + node.symlink("/nix/store/somewhereelse".as_bytes()).unwrap(); + + assert_eq!(include_bytes!("../tests/symlink.nar"), buf.as_slice()); +} + +#[cfg(feature = "async")] +#[test] +fn symlink_async() { + let mut buf = vec![]; + + futures::executor::block_on(async { + let node = nar::writer::r#async::open(&mut buf).await.unwrap(); + node.symlink("/nix/store/somewhereelse".as_bytes()) + .await + .unwrap(); + }); + + assert_eq!(include_bytes!("../tests/symlink.nar"), buf.as_slice()); +} + +#[test] +fn file() { + let mut buf = vec![]; + let node = nar::writer::open(&mut buf).unwrap(); + + let file_contents = "Hello World!".to_string(); + node.file( + false, + file_contents.len() as u64, + &mut std::io::Cursor::new(file_contents), + ) + .unwrap(); + + assert_eq!(include_bytes!("../tests/helloworld.nar"), buf.as_slice()); +} + +#[cfg(feature = "async")] +#[test] +fn file_async() { + let mut buf = vec![]; + + futures::executor::block_on(async { + let node = nar::writer::r#async::open(&mut buf).await.unwrap(); + + let file_contents = "Hello World!".to_string(); + node.file( + false, + file_contents.len() as u64, + &mut futures::io::Cursor::new(file_contents), + ) + .await + .unwrap(); + }); + + assert_eq!(include_bytes!("../tests/helloworld.nar"), buf.as_slice()); +} + +#[test] +fn complicated() { + let mut buf = vec![]; + let node = nar::writer::open(&mut buf).unwrap(); + + let mut dir_node = node.directory().unwrap(); + + let e = dir_node.entry(".keep".as_bytes()).unwrap(); + e.file(false, 0, &mut std::io::Cursor::new([])) + .expect("read .keep must succeed"); + + let e = dir_node.entry("aa".as_bytes()).unwrap(); + e.symlink("/nix/store/somewhereelse".as_bytes()) + .expect("symlink must succeed"); + + let e = dir_node.entry("keep".as_bytes()).unwrap(); + let mut subdir_node = e.directory().expect("directory must succeed"); + + let e_sub = subdir_node + .entry(".keep".as_bytes()) + .expect("subdir entry must succeed"); + e_sub.file(false, 0, &mut std::io::Cursor::new([])).unwrap(); + + // close the subdir, and then the dir, which is required. + subdir_node.close().unwrap(); + dir_node.close().unwrap(); + + assert_eq!(include_bytes!("../tests/complicated.nar"), buf.as_slice()); +} + +#[cfg(feature = "async")] +#[test] +fn complicated_async() { + let mut buf = vec![]; + + futures::executor::block_on(async { + let node = nar::writer::r#async::open(&mut buf).await.unwrap(); + + let mut dir_node = node.directory().await.unwrap(); + + let e = dir_node.entry(".keep".as_bytes()).await.unwrap(); + e.file(false, 0, &mut futures::io::Cursor::new([])) + .await + .expect("read .keep must succeed"); + + let e = dir_node.entry("aa".as_bytes()).await.unwrap(); + e.symlink("/nix/store/somewhereelse".as_bytes()) + .await + .expect("symlink must succeed"); + + let e = dir_node.entry("keep".as_bytes()).await.unwrap(); + let mut subdir_node = e.directory().await.expect("directory must succeed"); + + let e_sub = subdir_node + .entry(".keep".as_bytes()) + .await + .expect("subdir entry must succeed"); + e_sub + .file(false, 0, &mut futures::io::Cursor::new([])) + .await + .unwrap(); + + // close the subdir, and then the dir, which is required. + subdir_node.close().await.unwrap(); + dir_node.close().await.unwrap(); + }); + + assert_eq!(include_bytes!("../tests/complicated.nar"), buf.as_slice()); +} diff --git a/tvix/nix-compat/src/narinfo/fingerprint.rs b/tvix/nix-compat/src/narinfo/fingerprint.rs new file mode 100644 index 000000000000..3e02aca57160 --- /dev/null +++ b/tvix/nix-compat/src/narinfo/fingerprint.rs @@ -0,0 +1,50 @@ +use crate::{nixbase32, store_path::StorePathRef}; + +/// Computes the fingerprint string for certain fields in a [super::NarInfo]. +/// This fingerprint is signed by an ed25519 key, and in the case of a Nix HTTP +/// Binary cache, included in the NARInfo files served from there. +pub fn fingerprint<'a, R: Iterator<Item = &'a StorePathRef<'a>>>( + store_path: &StorePathRef, + nar_sha256: &[u8; 32], + nar_size: u64, + references: R, +) -> String { + format!( + "1;{};sha256:{};{};{}", + store_path.to_absolute_path(), + nixbase32::encode(nar_sha256), + nar_size, + // references are absolute paths, joined with `,`. + references + .map(|r| r.to_absolute_path()) + .collect::<Vec<String>>() + .join(",") + ) +} + +#[cfg(test)] +mod tests { + use crate::narinfo::NarInfo; + + const NARINFO_STR: &str = r#"StorePath: /nix/store/syd87l2rxw8cbsxmxl853h0r6pdwhwjr-curl-7.82.0-bin +URL: nar/05ra3y72i3qjri7xskf9qj8kb29r6naqy1sqpbs3azi3xcigmj56.nar.xz +Compression: xz +FileHash: sha256:05ra3y72i3qjri7xskf9qj8kb29r6naqy1sqpbs3azi3xcigmj56 +FileSize: 68852 +NarHash: sha256:1b4sb93wp679q4zx9k1ignby1yna3z7c4c2ri3wphylbc2dwsys0 +NarSize: 196040 +References: 0jqd0rlxzra1rs38rdxl43yh6rxchgc6-curl-7.82.0 6w8g7njm4mck5dmjxws0z1xnrxvl81xa-glibc-2.34-115 j5jxw3iy7bbz4a57fh9g2xm2gxmyal8h-zlib-1.2.12 yxvjs9drzsphm9pcf42a4byzj1kb9m7k-openssl-1.1.1n +Deriver: 5rwxzi7pal3qhpsyfc16gzkh939q1np6-curl-7.82.0.drv +Sig: cache.nixos.org-1:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ== +Sig: test1:519iiVLx/c4Rdt5DNt6Y2Jm6hcWE9+XY69ygiWSZCNGVcmOcyL64uVAJ3cV8vaTusIZdbTnYo9Y7vDNeTmmMBQ== +"#; + + #[test] + fn fingerprint() { + let parsed = NarInfo::parse(NARINFO_STR).expect("must parse"); + assert_eq!( + "1;/nix/store/syd87l2rxw8cbsxmxl853h0r6pdwhwjr-curl-7.82.0-bin;sha256:1b4sb93wp679q4zx9k1ignby1yna3z7c4c2ri3wphylbc2dwsys0;196040;/nix/store/0jqd0rlxzra1rs38rdxl43yh6rxchgc6-curl-7.82.0,/nix/store/6w8g7njm4mck5dmjxws0z1xnrxvl81xa-glibc-2.34-115,/nix/store/j5jxw3iy7bbz4a57fh9g2xm2gxmyal8h-zlib-1.2.12,/nix/store/yxvjs9drzsphm9pcf42a4byzj1kb9m7k-openssl-1.1.1n", + parsed.fingerprint() + ); + } +} diff --git a/tvix/nix-compat/src/narinfo/mod.rs b/tvix/nix-compat/src/narinfo/mod.rs new file mode 100644 index 000000000000..b1c10bceb200 --- /dev/null +++ b/tvix/nix-compat/src/narinfo/mod.rs @@ -0,0 +1,527 @@ +//! NAR info files describe a store path in a traditional Nix binary cache. +//! Over the wire, they are formatted as "Key: value" pairs separated by newlines. +//! +//! It contains four kinds of information: +//! 1. the description of the store path itself +//! * store path prefix, digest, and name +//! * NAR hash and size +//! * references +//! 2. authenticity information +//! * zero or more signatures over that description +//! * an optional [CAHash] for content-addressed paths (fixed outputs, sources, and derivations) +//! 3. derivation metadata +//! * deriver (the derivation that produced this path) +//! * system (the system value of that derivation) +//! 4. cache-specific information +//! * URL of the compressed NAR, relative to the NAR info file +//! * compression algorithm used for the NAR +//! * hash and size of the compressed NAR + +use bitflags::bitflags; +use data_encoding::HEXLOWER; +use std::{ + fmt::{self, Display}, + mem, +}; + +use crate::{nixbase32, nixhash::CAHash, store_path::StorePathRef}; + +mod fingerprint; +mod public_keys; +mod signature; + +pub use fingerprint::fingerprint; + +pub use public_keys::{Error as PubKeyError, PubKey}; +pub use signature::{Error as SignatureError, Signature}; + +#[derive(Debug)] +pub struct NarInfo<'a> { + pub flags: Flags, + // core (authenticated, but unverified here) + /// Store path described by this [NarInfo] + pub store_path: StorePathRef<'a>, + /// SHA-256 digest of the NAR file + pub nar_hash: [u8; 32], + /// Size of the NAR file in bytes + pub nar_size: u64, + /// Store paths known to be referenced by the contents + pub references: Vec<StorePathRef<'a>>, + // authenticity + /// Ed25519 signature over the path fingerprint + pub signatures: Vec<Signature<'a>>, + /// Content address (for content-defined paths) + pub ca: Option<CAHash>, + // derivation metadata + /// Nix system triple of [NarInfo::deriver] + pub system: Option<&'a str>, + /// Store path of the derivation that produced this. The last .drv suffix is stripped. + pub deriver: Option<StorePathRef<'a>>, + // cache-specific untrusted metadata + /// Relative URL of the compressed NAR file + pub url: &'a str, + /// Compression method of the NAR file + /// `None` means `Compression: none`. + /// + /// Nix interprets a missing `Compression` field as `Some("bzip2")`, + /// so we do as well. We haven't found any examples of this in the + /// wild, not even in the cache.nixos.org dataset. + pub compression: Option<&'a str>, + /// SHA-256 digest of the file at `url` + pub file_hash: Option<[u8; 32]>, + /// Size of the file at `url` in bytes + pub file_size: Option<u64>, +} + +bitflags! { + /// TODO(edef): be conscious of these when roundtripping + #[derive(Debug, Copy, Clone)] + pub struct Flags: u8 { + const UNKNOWN_FIELD = 1 << 0; + const COMPRESSION_DEFAULT = 1 << 1; + // Format quirks encountered in the cache.nixos.org dataset + const REFERENCES_OUT_OF_ORDER = 1 << 2; + const NAR_HASH_HEX = 1 << 3; + } +} + +impl<'a> NarInfo<'a> { + pub fn parse(input: &'a str) -> Result<Self, Error> { + let mut flags = Flags::empty(); + let mut store_path = None; + let mut url = None; + let mut compression = None; + let mut file_hash = None; + let mut file_size = None; + let mut nar_hash = None; + let mut nar_size = None; + let mut references = None; + let mut system = None; + let mut deriver = None; + let mut signatures = vec![]; + let mut ca = None; + + for line in input.lines() { + let (tag, val) = line + .split_once(':') + .ok_or_else(|| Error::InvalidLine(line.to_string()))?; + + let val = val + .strip_prefix(' ') + .ok_or_else(|| Error::InvalidLine(line.to_string()))?; + + match tag { + "StorePath" => { + let val = val + .strip_prefix("/nix/store/") + .ok_or(Error::InvalidStorePath( + crate::store_path::Error::MissingStoreDir, + ))?; + let val = StorePathRef::from_bytes(val.as_bytes()) + .map_err(Error::InvalidStorePath)?; + + if store_path.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "URL" => { + if val.is_empty() { + return Err(Error::EmptyField(tag.to_string())); + } + + if url.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "Compression" => { + if val.is_empty() { + return Err(Error::EmptyField(tag.to_string())); + } + + if compression.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "FileHash" => { + let val = val + .strip_prefix("sha256:") + .ok_or_else(|| Error::MissingPrefixForHash(tag.to_string()))?; + let val = nixbase32::decode_fixed::<32>(val) + .map_err(|e| Error::UnableToDecodeHash(tag.to_string(), e))?; + + if file_hash.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "FileSize" => { + let val = val + .parse::<u64>() + .map_err(|_| Error::UnableToParseSize(tag.to_string(), val.to_string()))?; + + if file_size.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "NarHash" => { + let val = val + .strip_prefix("sha256:") + .ok_or_else(|| Error::MissingPrefixForHash(tag.to_string()))?; + + let val = if val.len() != HEXLOWER.encode_len(32) { + nixbase32::decode_fixed::<32>(val) + } else { + flags |= Flags::NAR_HASH_HEX; + + let val = val.as_bytes(); + let mut buf = [0u8; 32]; + + HEXLOWER + .decode_mut(val, &mut buf) + .map_err(|e| e.error) + .map(|_| buf) + }; + + let val = val.map_err(|e| Error::UnableToDecodeHash(tag.to_string(), e))?; + + if nar_hash.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "NarSize" => { + let val = val + .parse::<u64>() + .map_err(|_| Error::UnableToParseSize(tag.to_string(), val.to_string()))?; + + if nar_size.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "References" => { + let val: Vec<StorePathRef> = if !val.is_empty() { + let mut prev = ""; + val.split(' ') + .enumerate() + .map(|(i, s)| { + // TODO(edef): track *duplicates* if this occurs + if mem::replace(&mut prev, s) >= s { + flags |= Flags::REFERENCES_OUT_OF_ORDER; + } + + StorePathRef::from_bytes(s.as_bytes()) + .map_err(|err| Error::InvalidReference(i, err)) + }) + .collect::<Result<_, _>>()? + } else { + vec![] + }; + + if references.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "System" => { + if val.is_empty() { + return Err(Error::EmptyField(tag.to_string())); + } + + if system.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + "Deriver" => { + match val.strip_suffix(".drv") { + Some(val) => { + let val = StorePathRef::from_bytes(val.as_bytes()) + .map_err(Error::InvalidDeriverStorePath)?; + + if deriver.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + None => { + return Err(Error::InvalidDeriverStorePathMissingSuffix); + } + }; + } + "Sig" => { + let val = Signature::parse(val) + .map_err(|e| Error::UnableToParseSignature(signatures.len(), e))?; + + signatures.push(val); + } + "CA" => { + let val = CAHash::from_nix_hex_str(val) + .ok_or_else(|| Error::UnableToParseCA(val.to_string()))?; + + if ca.replace(val).is_some() { + return Err(Error::DuplicateField(tag.to_string())); + } + } + _ => { + flags |= Flags::UNKNOWN_FIELD; + } + } + } + + Ok(NarInfo { + store_path: store_path.ok_or(Error::MissingField("StorePath"))?, + nar_hash: nar_hash.ok_or(Error::MissingField("NarHash"))?, + nar_size: nar_size.ok_or(Error::MissingField("NarSize"))?, + references: references.ok_or(Error::MissingField("References"))?, + signatures, + ca, + system, + deriver, + url: url.ok_or(Error::MissingField("URL"))?, + compression: match compression { + Some("none") => None, + None => { + flags |= Flags::COMPRESSION_DEFAULT; + Some("bzip2") + } + _ => compression, + }, + file_hash, + file_size, + flags, + }) + } + + /// Computes the fingerprint string for certain fields in this [NarInfo]. + /// This fingerprint is signed in [self.signatures]. + pub fn fingerprint(&self) -> String { + fingerprint( + &self.store_path, + &self.nar_hash, + self.nar_size, + self.references.iter(), + ) + } +} + +impl Display for NarInfo<'_> { + fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { + writeln!(w, "StorePath: /nix/store/{}", self.store_path)?; + writeln!(w, "URL: {}", self.url)?; + + if let Some(compression) = self.compression { + writeln!(w, "Compression: {compression}")?; + } + + if let Some(file_hash) = self.file_hash { + writeln!(w, "FileHash: sha256:{}", nixbase32::encode(&file_hash),)?; + } + + if let Some(file_size) = self.file_size { + writeln!(w, "FileSize: {file_size}")?; + } + + writeln!(w, "NarHash: sha256:{}", nixbase32::encode(&self.nar_hash),)?; + writeln!(w, "NarSize: {}", self.nar_size)?; + + write!(w, "References:")?; + if self.references.is_empty() { + write!(w, " ")?; + } else { + for path in &self.references { + write!(w, " {path}")?; + } + } + writeln!(w)?; + + if let Some(deriver) = &self.deriver { + writeln!(w, "Deriver: {deriver}.drv")?; + } + + if let Some(system) = self.system { + writeln!(w, "System: {system}")?; + } + + for sig in &self.signatures { + writeln!(w, "Sig: {sig}")?; + } + + if let Some(ca) = &self.ca { + writeln!(w, "CA: {}", ca.to_nix_nixbase32_string())?; + } + + Ok(()) + } +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("duplicate field: {0}")] + DuplicateField(String), + + #[error("missing field: {0}")] + MissingField(&'static str), + + #[error("invalid line: {0}")] + InvalidLine(String), + + #[error("invalid StorePath: {0}")] + InvalidStorePath(crate::store_path::Error), + + #[error("field {0} may not be empty string")] + EmptyField(String), + + #[error("invalid {0}: {1}")] + UnableToParseSize(String, String), + + #[error("unable to parse #{0} reference: {1}")] + InvalidReference(usize, crate::store_path::Error), + + #[error("invalid Deriver store path: {0}")] + InvalidDeriverStorePath(crate::store_path::Error), + + #[error("invalid Deriver store path, must end with .drv")] + InvalidDeriverStorePathMissingSuffix, + + #[error("missing prefix for {0}")] + MissingPrefixForHash(String), + + #[error("unable to decode {0}: {1}")] + UnableToDecodeHash(String, data_encoding::DecodeError), + + #[error("unable to parse signature #{0}: {1}")] + UnableToParseSignature(usize, SignatureError), + + #[error("unable to parse CA field: {0}")] + UnableToParseCA(String), +} + +#[cfg(test)] +mod test { + use hex_literal::hex; + use lazy_static::lazy_static; + use pretty_assertions::assert_eq; + use std::{io, str}; + + use crate::{ + nixhash::{CAHash, NixHash}, + store_path::StorePathRef, + }; + + use super::{Flags, NarInfo}; + + lazy_static! { + static ref CASES: &'static [&'static str] = { + let data = zstd::decode_all(io::Cursor::new(include_bytes!( + "../../testdata/narinfo.zst" + ))) + .unwrap(); + let data = str::from_utf8(Vec::leak(data)).unwrap(); + Vec::leak( + data.split_inclusive("\n\n") + .map(|s| s.strip_suffix('\n').unwrap()) + .collect::<Vec<_>>(), + ) + }; + } + + #[test] + fn roundtrip() { + for &input in *CASES { + let parsed = NarInfo::parse(input).expect("should parse"); + let output = format!("{parsed}"); + assert_eq!(input, output, "should roundtrip"); + } + } + + #[test] + fn references_out_of_order() { + let parsed = NarInfo::parse( + r#"StorePath: /nix/store/xi429w4ddvb1r77978hm7jfb2jsn559r-gcc-3.4.6 +URL: nar/1hr09cgkyw1hcsfkv5qp5jlpmf2mqrkrqs3xj5zklq9c1h9544ff.nar.bz2 +Compression: bzip2 +FileHash: sha256:1hr09cgkyw1hcsfkv5qp5jlpmf2mqrkrqs3xj5zklq9c1h9544ff +FileSize: 4006 +NarHash: sha256:0ik9mpqxpd9hv325hdblj2nawqj5w7951qdyy8ikxgwr6fq7m11c +NarSize: 21264 +References: a8922c0h87iilxzzvwn2hmv8x210aqb9-glibc-2.7 7w2acjgalb0cm7b3bg8yswza4l7iil9y-binutils-2.18 mm631h09mj964hm9q04l5fd8vw12j1mm-bash-3.2-p39 nx2zs2qd6snfcpzw4a0jnh26z9m0yihz-gcc-3.4.6 xi429w4ddvb1r77978hm7jfb2jsn559r-gcc-3.4.6 +Deriver: 2dzpn70c1hawczwhg9aavqk18zp9zsva-gcc-3.4.6.drv +Sig: cache.nixos.org-1:o1DTsjCz0PofLJ216P2RBuSulI8BAb6zHxWE4N+tzlcELk5Uk/GO2SCxWTRN5wJutLZZ+cHTMdWqOHF88KGQDg== +"#).expect("should parse"); + + assert!(parsed.flags.contains(Flags::REFERENCES_OUT_OF_ORDER)); + assert_eq!( + vec![ + "a8922c0h87iilxzzvwn2hmv8x210aqb9-glibc-2.7", + "7w2acjgalb0cm7b3bg8yswza4l7iil9y-binutils-2.18", + "mm631h09mj964hm9q04l5fd8vw12j1mm-bash-3.2-p39", + "nx2zs2qd6snfcpzw4a0jnh26z9m0yihz-gcc-3.4.6", + "xi429w4ddvb1r77978hm7jfb2jsn559r-gcc-3.4.6" + ], + parsed + .references + .iter() + .map(StorePathRef::to_string) + .collect::<Vec<_>>(), + ); + } + + #[test] + fn ca_nar_hash_sha1() { + let parsed = NarInfo::parse( + r#"StorePath: /nix/store/k20pahypzvr49fy82cw5sx72hdfg3qcr-texlive-hyphenex-37354 +URL: nar/0i5biw0g01514llhfswxy6xfav8lxxdq1xg6ik7hgsqbpw0f06yi.nar.xz +Compression: xz +FileHash: sha256:0i5biw0g01514llhfswxy6xfav8lxxdq1xg6ik7hgsqbpw0f06yi +FileSize: 7120 +NarHash: sha256:0h1bm4sj1cnfkxgyhvgi8df1qavnnv94sd0v09wcrm971602shfg +NarSize: 22552 +References: +Sig: cache.nixos.org-1:u01BybwQhyI5H1bW1EIWXssMDhDDIvXOG5uh8Qzgdyjz6U1qg6DHhMAvXZOUStIj6X5t4/ufFgR8i3fjf0bMAw== +CA: fixed:r:sha1:1ak1ymbmsfx7z8kh09jzkr3a4dvkrfjw +"#).expect("should parse"); + + assert_eq!( + parsed.ca, + Some(CAHash::Nar(NixHash::Sha1(hex!( + "5cba3c77236ae4f9650270a27fbad375551fa60a" + )))) + ); + } + + #[test] + fn compression_default() { + // This doesn't exist as such in cache.nixos.org. + // We explicitly removed the compression field for the sake of this test. + let parsed = NarInfo::parse(r#"StorePath: /nix/store/a1jjalr4csx9hcga7fnm122aqabrjnch-digikam-2.6.0 +URL: nar/1fzimfnvq2k8b40n4g54abmncpx2ddckh6qlb77pgq6xiysyil69.nar.bz2 +FileHash: sha256:1fzimfnvq2k8b40n4g54abmncpx2ddckh6qlb77pgq6xiysyil69 +FileSize: 43503778 +NarHash: sha256:0zpbbwipqzr5p8mlpag9wrsp5hlaxkq7gax5jj0hg3vvdziypcw5 +NarSize: 100658640 +References: 0izkyk7bq2ag9393nvnhgm87p75cq09w-liblqr-1-0.4.1 1cslpgyb7vb30inj3210jv6agqv42jxz-qca-2.0.3 1sya3bwjxkzpkmwn67gfzp4gz4g62l36-libXrandr-1.3.1 26yxdaa9z0ma5sgw02i670rsqnl57crs-glib-2.30.3 27lnjh99236kmhbpc5747599zcymfzmg-qt-4.8.2 2v6x378vcfvyxilkvihs60zha54z2x2y-qjson-0.7.1 45hgr3fbnr45n795hn2x7hsymp0h2j2m-libjpeg-8c 4kw1b212s80ap2iyibxrimcqb5imhfj7-libkexiv2-4.7.4 7dvylm5crlc0sfafcc0n46mb5ch67q0j-glibc-2.13 a05cbh1awjbl1rbyb2ynyf4k42v5a9a7-boost-1.47.0 a1jjalr4csx9hcga7fnm122aqabrjnch-digikam-2.6.0 aav5ffg8wlnilgnvdb2jnrv2aam4zmmz-perl-5.14.2 ab0m9h30nsr13w48qriv0k350kmwx567-kdelibs-4.7.4 avffkd49cqvpwdkzry8bn69dkbw4cy29-lensfun-0.2.5 cy8rl8h4yp2j3h8987vkklg328q3wmjz-gcc-4.6.3 dmmh5ihyg1r2dm4azgsfj2kprj92czlg-libSM-1.2.0 fl56j5n4shfw9c0r6vs2i4f1h9zx5kac-soprano-2.7.6 g15cmvh15ggdjcwapskngv20q4yhix40-jasper-1.900.1 i04maxd0din6v92rnqcwl9yra0kl2vk5-marble-4.7.4 kqjjb3m26rdddwwwkk8v45821aps877k-libICE-1.0.7 lxz9r135wkndvi642z4bjgmvyypsgirb-libtiff-3.9.4 m9c8i0a6cl30lcqp654dqkbag3wjmd00-libX11-1.4.1 mpnj4k2ijrgyfkh48fg96nzcmklfh5pl-coreutils-8.15 nppljblap477s0893c151lyq7r7n5v1q-zlib-1.2.7 nw9mdbyp8kyn3v4vkdzq0gsnqbc4mnx3-expat-2.0.1 p1a0dn931mzdkvj6h5yzshbmgxba5r0z-libgphoto2-2.4.11 pvjj07xa1cfkad3gwk376nzdrgknbcqm-mesa-7.11.2 pzcxag98jqccp9ycbxknyh0w95pgnsk4-lcms-1.19 qfi5pgds33kg6vlnxsmj0hyl74vcmyiz-libpng-1.5.10 scm6bj86s3qh3s3x0b9ayjp6755p4q86-mysql-5.1.54 sd23qspcyg385va0lr35xgz3hvlqphg6-libkipi-4.7.4 svmbrhc6kzfzakv20a7zrfl6kbr5mfpq-kdepimlibs-4.7.4 v7kh3h7xfwjz4hgffg3gwrfzjff9bw9d-bash-4.2-p24 vi17f22064djgpk0w248da348q8gxkww-libkdcraw-4.7.4 wkjdzmj3z4dcbsc9f833zs6krdgg2krk-phonon-4.6.0 xf3i3awqi0035ixy2qyb6hk4c92r3vrn-opencv-2.4.2 y1vr0nz8i59x59501020nh2k1dw3bhwq-libusb-0.1.12 yf3hin2hb6i08n7zrk8g3acy54rhg9bp-libXext-1.2.0 +Deriver: la77dr44phk5m5jnl4dvk01cwpykyw9s-digikam-2.6.0.drv +System: i686-linux +Sig: cache.nixos.org-1:92fl0i5q7EyegCj5Yf4L0bENkWuVAtgveiRcTEEUH0P6HvCE1xFcPbz/0Pf6Np+K1LPzHK+s5RHOmVoxRsvsDg== +"#).expect("should parse"); + + assert!(parsed.flags.contains(Flags::COMPRESSION_DEFAULT)); + assert_eq!(parsed.compression, Some("bzip2")); + } + + #[test] + fn nar_hash_hex() { + let parsed = NarInfo::parse(r#"StorePath: /nix/store/0vpqfxbkx0ffrnhbws6g9qwhmliksz7f-perl-HTTP-Cookies-6.01 +URL: nar/1rv1m9inydm1r4krw8hmwg1hs86d0nxddd1pbhihx7l7fycjvfk3.nar.xz +Compression: xz +FileHash: sha256:1rv1m9inydm1r4krw8hmwg1hs86d0nxddd1pbhihx7l7fycjvfk3 +FileSize: 19912 +NarHash: sha256:60adfd293a4d81ad7cd7e47263cbb3fc846309ef91b154a08ba672b558f94ff3 +NarSize: 45840 +References: 0vpqfxbkx0ffrnhbws6g9qwhmliksz7f-perl-HTTP-Cookies-6.01 9vrhbib2lxd9pjlg6fnl5b82gblidrcr-perl-HTTP-Message-6.06 wy20zslqxzxxfpzzk0rajh41d7a6mlnf-perl-HTTP-Date-6.02 +Deriver: fb4ihlq3psnsjq95mvvs49rwpplpc8zj-perl-HTTP-Cookies-6.01.drv +Sig: cache.nixos.org-1:HhaiY36Uk3XV1JGe9d9xHnzAapqJXprU1YZZzSzxE97jCuO5RR7vlG2kF7MSC5thwRyxAtdghdSz3AqFi+QSCw== +"#).expect("should parse"); + + assert!(parsed.flags.contains(Flags::NAR_HASH_HEX)); + assert_eq!( + hex!("60adfd293a4d81ad7cd7e47263cbb3fc846309ef91b154a08ba672b558f94ff3"), + parsed.nar_hash, + ); + } +} diff --git a/tvix/nix-compat/src/narinfo/public_keys.rs b/tvix/nix-compat/src/narinfo/public_keys.rs new file mode 100644 index 000000000000..ced05cadb133 --- /dev/null +++ b/tvix/nix-compat/src/narinfo/public_keys.rs @@ -0,0 +1,149 @@ +//! This module defines data structures and parsers for the public key format +//! used inside Nix to verify signatures on .narinfo files. + +use std::fmt::Display; + +use data_encoding::BASE64; +use ed25519_dalek::{VerifyingKey, PUBLIC_KEY_LENGTH}; + +use super::Signature; + +/// This represents a ed25519 public key and "name". +/// These are normally passed in the `trusted-public-keys` Nix config option, +/// and consist of a name and base64-encoded ed25519 pubkey, separated by a `:`. +#[derive(Debug)] +pub struct PubKey { + name: String, + verifying_key: VerifyingKey, +} + +impl PubKey { + pub fn new(name: String, verifying_key: VerifyingKey) -> Self { + Self { + name, + verifying_key, + } + } + + pub fn parse(input: &str) -> Result<Self, Error> { + let (name, bytes64) = input.split_once(':').ok_or(Error::MissingSeparator)?; + + if name.is_empty() + || !name + .chars() + .all(|c| char::is_alphanumeric(c) || c == '-' || c == '.') + { + return Err(Error::InvalidName(name.to_string())); + } + + if bytes64.len() != BASE64.encode_len(PUBLIC_KEY_LENGTH) { + return Err(Error::InvalidPubKeyLen(bytes64.len())); + } + + let mut buf = [0; PUBLIC_KEY_LENGTH + 1]; + let mut bytes = [0; PUBLIC_KEY_LENGTH]; + match BASE64.decode_mut(bytes64.as_bytes(), &mut buf) { + Ok(PUBLIC_KEY_LENGTH) => { + bytes.copy_from_slice(&buf[..PUBLIC_KEY_LENGTH]); + } + Ok(_) => unreachable!(), + // keeping DecodePartial gets annoying lifetime-wise + Err(_) => return Err(Error::DecodeError(input.to_string())), + } + + let verifying_key = VerifyingKey::from_bytes(&bytes).map_err(Error::InvalidVerifyingKey)?; + + Ok(Self { + name: name.to_string(), + verifying_key, + }) + } + + pub fn name(&self) -> &str { + &self.name + } + + /// Verify the passed in signature is a correct signature for the passed in fingerprint and is signed + /// by the key material referred to by [Self], + /// which means the name in the signature has to match, + /// and the signature bytes themselves need to be a valid signature made by + /// the signing key identified by [Self::verifying key]. + pub fn verify(&self, fingerprint: &str, signature: &Signature) -> bool { + if self.name() != signature.name() { + return false; + } + + return signature.verify(fingerprint.as_bytes(), &self.verifying_key); + } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Invalid name: {0}")] + InvalidName(String), + #[error("Missing separator")] + MissingSeparator, + #[error("Invalid pubkey len: {0}")] + InvalidPubKeyLen(usize), + #[error("VerifyingKey error: {0}")] + InvalidVerifyingKey(ed25519_dalek::SignatureError), + #[error("Unable to base64-decode pubkey: {0}")] + DecodeError(String), +} + +impl Display for PubKey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}:{}", + self.name, + BASE64.encode(self.verifying_key.as_bytes()) + ) + } +} + +#[cfg(test)] +mod test { + use data_encoding::BASE64; + use ed25519_dalek::PUBLIC_KEY_LENGTH; + use test_case::test_case; + + use crate::narinfo::Signature; + + use super::PubKey; + const FINGERPRINT: &str = "1;/nix/store/syd87l2rxw8cbsxmxl853h0r6pdwhwjr-curl-7.82.0-bin;sha256:1b4sb93wp679q4zx9k1ignby1yna3z7c4c2ri3wphylbc2dwsys0;196040;/nix/store/0jqd0rlxzra1rs38rdxl43yh6rxchgc6-curl-7.82.0,/nix/store/6w8g7njm4mck5dmjxws0z1xnrxvl81xa-glibc-2.34-115,/nix/store/j5jxw3iy7bbz4a57fh9g2xm2gxmyal8h-zlib-1.2.12,/nix/store/yxvjs9drzsphm9pcf42a4byzj1kb9m7k-openssl-1.1.1n"; + + #[test_case("cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=", "cache.nixos.org-1", BASE64.decode(b"6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=").unwrap()[..].try_into().unwrap(); "cache.nixos.org")] + #[test_case("cheesecake:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=", "cheesecake", BASE64.decode(b"6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=").unwrap()[..].try_into().unwrap(); "cache.nixos.org different name")] + #[test_case("test1:tLAEn+EeaBUJYqEpTd2yeerr7Ic6+0vWe+aXL/vYUpE=", "test1", BASE64.decode(b"tLAEn+EeaBUJYqEpTd2yeerr7Ic6+0vWe+aXL/vYUpE=").unwrap()[..].try_into().unwrap(); "test-1")] + fn parse( + input: &'static str, + exp_name: &'static str, + exp_verifying_key_bytes: &[u8; PUBLIC_KEY_LENGTH], + ) { + let pubkey = PubKey::parse(input).expect("must parse"); + assert_eq!(exp_name, pubkey.name()); + assert_eq!(exp_verifying_key_bytes, pubkey.verifying_key.as_bytes()); + } + + #[test_case("6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY="; "empty name")] + #[test_case("cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY"; "missing padding")] + #[test_case("cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDS"; "wrong length")] + fn parse_fail(input: &'static str) { + PubKey::parse(input).expect_err("must fail"); + } + + #[test_case("cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=", FINGERPRINT, "cache.nixos.org-1:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==", true; "correct cache.nixos.org")] + #[test_case("cache.nixos.org-1:6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=", FINGERPRINT, "cache.nixos.org:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==", false; "wrong name mismatch")] + fn verify( + pubkey_str: &'static str, + fingerprint: &'static str, + signature_str: &'static str, + expected: bool, + ) { + let pubkey = PubKey::parse(pubkey_str).expect("must parse"); + let signature = Signature::parse(signature_str).expect("must parse"); + + assert_eq!(expected, pubkey.verify(fingerprint, &signature)); + } +} diff --git a/tvix/nix-compat/src/narinfo/signature.rs b/tvix/nix-compat/src/narinfo/signature.rs new file mode 100644 index 000000000000..1f4a90c26c2e --- /dev/null +++ b/tvix/nix-compat/src/narinfo/signature.rs @@ -0,0 +1,132 @@ +use std::fmt::{self, Display}; + +use data_encoding::BASE64; +use ed25519_dalek::SIGNATURE_LENGTH; + +#[derive(Debug)] +pub struct Signature<'a> { + name: &'a str, + bytes: [u8; SIGNATURE_LENGTH], +} + +impl<'a> Signature<'a> { + pub fn new(name: &'a str, bytes: [u8; SIGNATURE_LENGTH]) -> Self { + Self { name, bytes } + } + + pub fn parse(input: &'a str) -> Result<Self, Error> { + let (name, bytes64) = input.split_once(':').ok_or(Error::MissingSeparator)?; + + if name.is_empty() + || !name + .chars() + .all(|c| char::is_alphanumeric(c) || c == '-' || c == '.') + { + return Err(Error::InvalidName(name.to_string())); + } + + if bytes64.len() != BASE64.encode_len(SIGNATURE_LENGTH) { + return Err(Error::InvalidSignatureLen(bytes64.len())); + } + + let mut bytes = [0; SIGNATURE_LENGTH]; + let mut buf = [0; SIGNATURE_LENGTH + 2]; + match BASE64.decode_mut(bytes64.as_bytes(), &mut buf) { + Ok(SIGNATURE_LENGTH) => bytes.copy_from_slice(&buf[..SIGNATURE_LENGTH]), + Ok(_) => unreachable!(), + // keeping DecodePartial gets annoying lifetime-wise + Err(_) => return Err(Error::DecodeError(input.to_string())), + } + + Ok(Signature { name, bytes }) + } + + pub fn name(&self) -> &'a str { + self.name + } + + pub fn bytes(&self) -> &[u8; SIGNATURE_LENGTH] { + &self.bytes + } + + /// For a given fingerprint and ed25519 verifying key, ensure if the signature is valid. + pub fn verify(&self, fingerprint: &[u8], verifying_key: &ed25519_dalek::VerifyingKey) -> bool { + let signature = ed25519_dalek::Signature::from_bytes(self.bytes()); + + verifying_key.verify_strict(fingerprint, &signature).is_ok() + } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Invalid name: {0}")] + InvalidName(String), + #[error("Missing separator")] + MissingSeparator, + #[error("Invalid signature len: (expected {} b64-encoded, got {}", BASE64.encode_len(SIGNATURE_LENGTH), .0)] + InvalidSignatureLen(usize), + #[error("Unable to base64-decode signature: {0}")] + DecodeError(String), +} + +impl Display for Signature<'_> { + fn fmt(&self, w: &mut fmt::Formatter) -> fmt::Result { + write!(w, "{}:{}", self.name, BASE64.encode(&self.bytes)) + } +} + +#[cfg(test)] +mod test { + use data_encoding::BASE64; + use ed25519_dalek::VerifyingKey; + use lazy_static::lazy_static; + + use super::Signature; + use test_case::test_case; + + const FINGERPRINT: &str = "1;/nix/store/syd87l2rxw8cbsxmxl853h0r6pdwhwjr-curl-7.82.0-bin;sha256:1b4sb93wp679q4zx9k1ignby1yna3z7c4c2ri3wphylbc2dwsys0;196040;/nix/store/0jqd0rlxzra1rs38rdxl43yh6rxchgc6-curl-7.82.0,/nix/store/6w8g7njm4mck5dmjxws0z1xnrxvl81xa-glibc-2.34-115,/nix/store/j5jxw3iy7bbz4a57fh9g2xm2gxmyal8h-zlib-1.2.12,/nix/store/yxvjs9drzsphm9pcf42a4byzj1kb9m7k-openssl-1.1.1n"; + + // The signing key labelled as `cache.nixos.org-1`, + lazy_static! { + static ref PUB_CACHE_NIXOS_ORG_1: VerifyingKey = ed25519_dalek::VerifyingKey::from_bytes( + BASE64 + .decode(b"6NCHdD59X431o0gWypbMrAURkbJ16ZPMQFGspcDShjY=") + .unwrap()[..] + .try_into() + .unwrap() + ) + .unwrap(); + static ref PUB_TEST_1: VerifyingKey = ed25519_dalek::VerifyingKey::from_bytes( + BASE64 + .decode(b"tLAEn+EeaBUJYqEpTd2yeerr7Ic6+0vWe+aXL/vYUpE=") + .unwrap()[..] + .try_into() + .unwrap() + ) + .unwrap(); + } + + #[test_case(&PUB_CACHE_NIXOS_ORG_1, &"cache.nixos.org-1:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==", FINGERPRINT, true; "valid cache.nixos.org-1")] + #[test_case(&PUB_CACHE_NIXOS_ORG_1, &"cache.nixos.org-1:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==", FINGERPRINT, true; "valid test1")] + #[test_case(&PUB_CACHE_NIXOS_ORG_1, &"cache.nixos.org-2:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==", FINGERPRINT, true; "valid cache.nixos.org different name")] + #[test_case(&PUB_CACHE_NIXOS_ORG_1, &"cache.nixos.org-1:TsTTb000000000000000000000000ytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==", FINGERPRINT, false; "fail invalid cache.nixos.org-1 signature")] + #[test_case(&PUB_CACHE_NIXOS_ORG_1, &"cache.nixos.org-1:TsTTb3WGTZKphvYdBHXwo6weVILmTytUjLB+vcX89fOjjRicCHmKA4RCPMVLkj6TMJ4GMX3HPVWRdD1hkeKZBQ==", &FINGERPRINT[0..5], false; "fail valid sig but wrong fp cache.nixos.org-1")] + fn verify_sigs( + verifying_key: &VerifyingKey, + sig_str: &'static str, + fp: &str, + expect_valid: bool, + ) { + let sig = Signature::parse(sig_str).expect("must parse"); + assert_eq!(expect_valid, sig.verify(fp.as_bytes(), verifying_key)); + } + + #[test_case("cache.nixos.org-1:o1DTsjCz0PofLJ216P2RBuSulI8BAb6zHxWE4N+tzlcELk5Uk/GO2SCxWTRN5wJutLZZ+cHTMdWqOHF8"; "wrong length")] + #[test_case("test\n:u01BybwQhyI5H1bW1EIWXssMDhDDIvXOG5uh8Qzgdyjz6U1qg6DHhMAvXZOUStIj6X5t4/ufFgR8i3fjf0bMAw=="; "wrong name newline")] + #[test_case("test :u01BybwQhyI5H1bW1EIWXssMDhDDIvXOG5uh8Qzgdyjz6U1qg6DHhMAvXZOUStIj6X5t4/ufFgR8i3fjf0bMAw=="; "wrong name space")] + #[test_case(":u01BybwQhyI5H1bW1EIWXssMDhDDIvXOG5uh8Qzgdyjz6U1qg6DHhMAvXZOUStIj6X5t4/ufFgR8i3fjf0bMAw=="; "empty name")] + #[test_case("u01BybwQhyI5H1bW1EIWXssMDhDDIvXOG5uh8Qzgdyjz6U1qg6DHhMAvXZOUStIj6X5t4/ufFgR8i3fjf0bMAw=="; "b64 only")] + fn parse_fail(input: &'static str) { + Signature::parse(input).expect_err("must fail"); + } +} diff --git a/tvix/nix-compat/src/nixbase32.rs b/tvix/nix-compat/src/nixbase32.rs new file mode 100644 index 000000000000..c4c2f2b96761 --- /dev/null +++ b/tvix/nix-compat/src/nixbase32.rs @@ -0,0 +1,203 @@ +//! Implements the slightly odd "base32" encoding that's used in Nix. +//! +//! Nix uses a custom alphabet. Contrary to other implementations (RFC4648), +//! encoding to "nix base32" doesn't use any padding, and reads in characters +//! in reverse order. +//! +//! This is also the main reason why we can't use `data_encoding::Encoding` - +//! it gets things wrong if there normally would be a need for padding. + +use std::fmt::Write; + +use data_encoding::{DecodeError, DecodeKind}; + +const ALPHABET: &[u8; 32] = b"0123456789abcdfghijklmnpqrsvwxyz"; + +/// Returns encoded input +pub fn encode(input: &[u8]) -> String { + let output_len = encode_len(input.len()); + let mut output = String::with_capacity(output_len); + + for n in (0..output_len).rev() { + let b = n * 5; // bit offset within the entire input + let i = b / 8; // input byte index + let j = b % 8; // bit offset within that input byte + + // 5-bit words aren't aligned to bytes + // we can only read byte-aligned units + // read 16 bits then shift and mask to 5 + let c = { + let mut word = input[i] as u16; + if let Some(&msb) = input.get(i + 1) { + word |= (msb as u16) << 8; + } + (word >> j) & 0x1f + }; + + output.write_char(ALPHABET[c as usize] as char).unwrap(); + } + + output +} + +/// This maps a nixbase32-encoded character to its binary representation, which +/// is also the index of the character in the alphabet. Invalid characters are +/// mapped to 0xFF, which is itself an invalid value. +const BASE32_ORD: [u8; 256] = { + let mut ord = [0xFF; 256]; + let mut alphabet = ALPHABET.as_slice(); + let mut i = 0; + + while let &[c, ref tail @ ..] = alphabet { + ord[c as usize] = i; + alphabet = tail; + i += 1; + } + + ord +}; + +/// Returns decoded input +pub fn decode(input: impl AsRef<[u8]>) -> Result<Vec<u8>, DecodeError> { + let input = input.as_ref(); + + let output_len = decode_len(input.len()); + let mut output: Vec<u8> = vec![0x00; output_len]; + + decode_inner(input, &mut output)?; + Ok(output) +} + +pub fn decode_fixed<const K: usize>(input: impl AsRef<[u8]>) -> Result<[u8; K], DecodeError> { + let input = input.as_ref(); + + if input.len() != encode_len(K) { + return Err(DecodeError { + position: input.len().min(encode_len(K)), + kind: DecodeKind::Length, + }); + } + + let mut output = [0; K]; + decode_inner(input, &mut output)?; + Ok(output) +} + +fn decode_inner(input: &[u8], output: &mut [u8]) -> Result<(), DecodeError> { + // loop over all characters in reverse, and keep the iteration count in n. + let mut carry = 0; + let mut mask = 0; + for (n, &c) in input.iter().rev().enumerate() { + let b = n * 5; + let i = b / 8; + let j = b % 8; + + let digit = BASE32_ORD[c as usize]; + let value = (digit as u16) << j; + output[i] |= value as u8 | carry; + carry = (value >> 8) as u8; + + mask |= digit; + } + + if mask == 0xFF { + return Err(DecodeError { + position: find_invalid(input), + kind: DecodeKind::Symbol, + }); + } + + // if we're at the end, but have a nonzero carry, the encoding is invalid. + if carry != 0 { + return Err(DecodeError { + position: 0, + kind: DecodeKind::Trailing, + }); + } + + Ok(()) +} + +fn find_invalid(input: &[u8]) -> usize { + for (i, &c) in input.iter().enumerate() { + if !ALPHABET.contains(&c) { + return i; + } + } + + unreachable!() +} + +/// Returns the decoded length of an input of length len. +pub const fn decode_len(len: usize) -> usize { + (len * 5) / 8 +} + +/// Returns the encoded length of an input of length len +pub const fn encode_len(len: usize) -> usize { + (len * 8 + 4) / 5 +} + +#[cfg(test)] +mod tests { + use hex_literal::hex; + use test_case::test_case; + + #[test_case("", &[]; "empty bytes")] + #[test_case("0z", &hex!("1f"); "one byte")] + #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", &hex!("8a12321522fd91efbd60ebb2481af88580f61600"); "store path")] + #[test_case("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", &hex!("b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30"); "sha256")] + fn encode(enc: &str, dec: &[u8]) { + assert_eq!(enc, super::encode(dec)); + } + + #[test_case("", Some(&[]) ; "empty bytes")] + #[test_case("0z", Some(&hex!("1f")); "one byte")] + #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", Some(&hex!("8a12321522fd91efbd60ebb2481af88580f61600")); "store path")] + #[test_case("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", Some(&hex!("b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30")); "sha256")] + // this is invalid encoding, because it encodes 10 1-bits, so the carry + // would be 2 1-bits + #[test_case("zz", None; "invalid encoding-1")] + // this is an even more specific example - it'd decode as 00000000 11 + #[test_case("c0", None; "invalid encoding-2")] + + fn decode(enc: &str, dec: Option<&[u8]>) { + match dec { + Some(dec) => { + // The decode needs to match what's passed in dec + assert_eq!(dec, super::decode(enc).unwrap()); + } + None => { + // the decode needs to be an error + assert!(super::decode(enc).is_err()); + } + } + } + + #[test] + fn decode_fixed() { + assert_eq!( + super::decode_fixed("00bgd045z0d4icpbc2yyz4gx48ak44la").unwrap(), + hex!("8a12321522fd91efbd60ebb2481af88580f61600") + ); + assert_eq!( + super::decode_fixed::<32>("00").unwrap_err(), + super::DecodeError { + position: 2, + kind: super::DecodeKind::Length + } + ); + } + + #[test] + fn encode_len() { + assert_eq!(super::encode_len(0), 0); + assert_eq!(super::encode_len(20), 32); + } + + #[test] + fn decode_len() { + assert_eq!(super::decode_len(0), 0); + assert_eq!(super::decode_len(32), 20); + } +} diff --git a/tvix/nix-compat/src/nixhash/algos.rs b/tvix/nix-compat/src/nixhash/algos.rs new file mode 100644 index 000000000000..f189eba09db6 --- /dev/null +++ b/tvix/nix-compat/src/nixhash/algos.rs @@ -0,0 +1,56 @@ +use std::fmt::Display; + +use serde::{Deserialize, Serialize}; + +use crate::nixhash::Error; + +/// This are the hash algorithms supported by cppnix. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum HashAlgo { + Md5, + Sha1, + Sha256, + Sha512, +} + +impl HashAlgo { + // return the number of bytes in the digest of the given hash algo. + pub fn digest_length(&self) -> usize { + match self { + HashAlgo::Sha1 => 20, + HashAlgo::Sha256 => 32, + HashAlgo::Sha512 => 64, + HashAlgo::Md5 => 16, + } + } +} + +impl Display for HashAlgo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self { + HashAlgo::Md5 => write!(f, "md5"), + HashAlgo::Sha1 => write!(f, "sha1"), + HashAlgo::Sha256 => write!(f, "sha256"), + HashAlgo::Sha512 => write!(f, "sha512"), + } + } +} + +/// TODO(Raito): this could be automated via macros, I suppose. +/// But this may be more expensive than just doing it by hand +/// and ensuring that is kept in sync. +pub const SUPPORTED_ALGOS: [&str; 4] = ["md5", "sha1", "sha256", "sha512"]; + +impl TryFrom<&str> for HashAlgo { + type Error = Error; + + fn try_from(algo_str: &str) -> Result<Self, Self::Error> { + match algo_str { + "md5" => Ok(Self::Md5), + "sha1" => Ok(Self::Sha1), + "sha256" => Ok(Self::Sha256), + "sha512" => Ok(Self::Sha512), + _ => Err(Error::InvalidAlgo(algo_str.to_string())), + } + } +} diff --git a/tvix/nix-compat/src/nixhash/ca_hash.rs b/tvix/nix-compat/src/nixhash/ca_hash.rs new file mode 100644 index 000000000000..0d6c68d7fa7f --- /dev/null +++ b/tvix/nix-compat/src/nixhash/ca_hash.rs @@ -0,0 +1,213 @@ +use crate::nixbase32; +use crate::nixhash::{self, HashAlgo, NixHash}; +use serde::de::Unexpected; +use serde::ser::SerializeMap; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use serde_json::{Map, Value}; +use std::borrow::Cow; + +use super::algos::SUPPORTED_ALGOS; +use super::from_algo_and_digest; + +/// A Nix CAHash describes a content-addressed hash of a path. +/// +/// The way Nix prints it as a string is a bit confusing, but there's essentially +/// three modes, `Flat`, `Nar` and `Text`. +/// `Flat` and `Nar` support all 4 algos that [NixHash] supports +/// (sha1, md5, sha256, sha512), `Text` only supports sha256. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum CAHash { + Flat(NixHash), // "fixed flat" + Nar(NixHash), // "fixed recursive" + Text([u8; 32]), // "text", only supports sha256 +} + +impl CAHash { + pub fn digest(&self) -> Cow<NixHash> { + match *self { + CAHash::Flat(ref digest) => Cow::Borrowed(digest), + CAHash::Nar(ref digest) => Cow::Borrowed(digest), + CAHash::Text(digest) => Cow::Owned(NixHash::Sha256(digest)), + } + } + + /// Constructs a [CAHash] from the textual representation, + /// which is one of the three: + /// - `text:sha256:$nixbase32sha256digest` + /// - `fixed:r:$algo:$nixbase32digest` + /// - `fixed:$algo:$nixbase32digest` + /// which is the format that's used in the NARInfo for example. + pub fn from_nix_hex_str(s: &str) -> Option<Self> { + let (tag, s) = s.split_once(':')?; + + match tag { + "text" => { + let digest = s.strip_prefix("sha256:")?; + let digest = nixbase32::decode_fixed(digest).ok()?; + Some(CAHash::Text(digest)) + } + "fixed" => { + if let Some(s) = s.strip_prefix("r:") { + NixHash::from_nix_hex_str(s).map(CAHash::Nar) + } else { + NixHash::from_nix_hex_str(s).map(CAHash::Flat) + } + } + _ => None, + } + } + + /// Formats a [CAHash] in the Nix default hash format, which is the format + /// that's used in NARInfos for example. + pub fn to_nix_nixbase32_string(&self) -> String { + match self { + CAHash::Flat(nh) => format!("fixed:{}", nh.to_nix_nixbase32_string()), + CAHash::Nar(nh) => format!("fixed:r:{}", nh.to_nix_nixbase32_string()), + CAHash::Text(digest) => { + format!("text:sha256:{}", nixbase32::encode(digest)) + } + } + } + + /// This takes a serde_json::Map and turns it into this structure. This is necessary to do such + /// shenigans because we have external consumers, like the Derivation parser, who would like to + /// know whether we have a invalid or a missing NixHashWithMode structure in another structure, + /// e.g. Output. + /// This means we have this combinatorial situation: + /// - no hash, no hashAlgo: no [CAHash] so we return Ok(None). + /// - present hash, missing hashAlgo: invalid, we will return missing_field + /// - missing hash, present hashAlgo: same + /// - present hash, present hashAlgo: either we return ourselves or a type/value validation + /// error. + /// This function is for internal consumption regarding those needs until we have a better + /// solution. Now this is said, let's explain how this works. + /// + /// We want to map the serde data model into a [CAHash]. + /// + /// The serde data model has a `hash` field (containing a digest in nixbase32), + /// and a `hashAlgo` field, containing the stringified hash algo. + /// In case the hash is recursive, hashAlgo also has a `r:` prefix. + /// + /// This is to match how `nix show-derivation` command shows them in JSON + /// representation. + pub(crate) fn from_map<'de, D>(map: &Map<String, Value>) -> Result<Option<Self>, D::Error> + where + D: Deserializer<'de>, + { + // If we don't have hash neither hashAlgo, let's just return None. + if !map.contains_key("hash") && !map.contains_key("hashAlgo") { + return Ok(None); + } + + let digest: Vec<u8> = { + if let Some(v) = map.get("hash") { + if let Some(s) = v.as_str() { + data_encoding::HEXLOWER + .decode(s.as_bytes()) + .map_err(|e| serde::de::Error::custom(e.to_string()))? + } else { + return Err(serde::de::Error::invalid_type( + Unexpected::Other(&v.to_string()), + &"a string", + )); + } + } else { + return Err(serde::de::Error::missing_field( + "couldn't extract `hash` key but `hashAlgo` key present", + )); + } + }; + + if let Some(v) = map.get("hashAlgo") { + if let Some(s) = v.as_str() { + match s.strip_prefix("r:") { + Some(rest) => Ok(Some(Self::Nar( + from_algo_and_digest( + HashAlgo::try_from(rest).map_err(|e| { + serde::de::Error::invalid_value( + Unexpected::Other(&e.to_string()), + &format!("one of {}", SUPPORTED_ALGOS.join(",")).as_str(), + ) + })?, + &digest, + ) + .map_err(|e: nixhash::Error| { + serde::de::Error::invalid_value( + Unexpected::Other(&e.to_string()), + &"a digest with right length", + ) + })?, + ))), + None => Ok(Some(Self::Flat( + from_algo_and_digest( + HashAlgo::try_from(s).map_err(|e| { + serde::de::Error::invalid_value( + Unexpected::Other(&e.to_string()), + &format!("one of {}", SUPPORTED_ALGOS.join(",")).as_str(), + ) + })?, + &digest, + ) + .map_err(|e: nixhash::Error| { + serde::de::Error::invalid_value( + Unexpected::Other(&e.to_string()), + &"a digest with right length", + ) + })?, + ))), + } + } else { + Err(serde::de::Error::invalid_type( + Unexpected::Other(&v.to_string()), + &"a string", + )) + } + } else { + Err(serde::de::Error::missing_field( + "couldn't extract `hashAlgo` key, but `hash` key present", + )) + } + } +} + +impl Serialize for CAHash { + /// map a CAHash into the serde data model. + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let mut map = serializer.serialize_map(Some(2))?; + match self { + CAHash::Flat(h) => { + map.serialize_entry("hash", &nixbase32::encode(h.digest_as_bytes()))?; + map.serialize_entry("hashAlgo", &h.algo())?; + } + CAHash::Nar(h) => { + map.serialize_entry("hash", &nixbase32::encode(h.digest_as_bytes()))?; + map.serialize_entry("hashAlgo", &format!("r:{}", &h.algo()))?; + } + // It is not legal for derivations to use this (which is where + // we're currently exercising [Serialize] mostly, + // but it's still good to be able to serialize other CA hashes too. + CAHash::Text(h) => { + map.serialize_entry("hash", &nixbase32::encode(h.as_ref()))?; + map.serialize_entry("hashAlgo", "text")?; + } + }; + map.end() + } +} + +impl<'de> Deserialize<'de> for CAHash { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + let value = Self::from_map::<D>(&Map::deserialize(deserializer)?)?; + + match value { + None => Err(serde::de::Error::custom("couldn't parse as map")), + Some(v) => Ok(v), + } + } +} diff --git a/tvix/nix-compat/src/nixhash/mod.rs b/tvix/nix-compat/src/nixhash/mod.rs new file mode 100644 index 000000000000..e148eb72ee9c --- /dev/null +++ b/tvix/nix-compat/src/nixhash/mod.rs @@ -0,0 +1,530 @@ +use crate::nixbase32; +use data_encoding::{BASE64, BASE64_NOPAD, HEXLOWER}; +use thiserror; + +mod algos; +mod ca_hash; + +pub use algos::HashAlgo; +pub use ca_hash::CAHash; + +/// NixHash represents hashes known by Nix. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum NixHash { + Md5([u8; 16]), + Sha1([u8; 20]), + Sha256([u8; 32]), + Sha512(Box<[u8; 64]>), +} + +/// convenience Result type for all nixhash parsing Results. +pub type Result<V> = std::result::Result<V, Error>; + +impl NixHash { + /// returns the algo as [HashAlgo]. + pub fn algo(&self) -> HashAlgo { + match self { + NixHash::Md5(_) => HashAlgo::Md5, + NixHash::Sha1(_) => HashAlgo::Sha1, + NixHash::Sha256(_) => HashAlgo::Sha256, + NixHash::Sha512(_) => HashAlgo::Sha512, + } + } + + /// returns the digest as variable-length byte slice. + pub fn digest_as_bytes(&self) -> &[u8] { + match self { + NixHash::Md5(digest) => digest, + NixHash::Sha1(digest) => digest, + NixHash::Sha256(digest) => digest, + NixHash::Sha512(digest) => digest.as_ref(), + } + } + + /// Constructs a [NixHash] from the Nix default hash format, + /// the inverse of [Self::to_nix_hex_string]. + pub fn from_nix_hex_str(s: &str) -> Option<Self> { + let (tag, digest) = s.split_once(':')?; + + (match tag { + "md5" => nixbase32::decode_fixed(digest).map(NixHash::Md5), + "sha1" => nixbase32::decode_fixed(digest).map(NixHash::Sha1), + "sha256" => nixbase32::decode_fixed(digest).map(NixHash::Sha256), + "sha512" => nixbase32::decode_fixed(digest) + .map(Box::new) + .map(NixHash::Sha512), + _ => return None, + }) + .ok() + } + + /// Formats a [NixHash] in the Nix default hash format, + /// which is the algo, followed by a colon, then the lower hex encoded digest. + pub fn to_nix_hex_string(&self) -> String { + format!( + "{}:{}", + self.algo(), + HEXLOWER.encode(self.digest_as_bytes()) + ) + } + + /// Formats a [NixHash] in the format that's used inside CAHash, + /// which is the algo, followed by a colon, then the nixbase32-encoded digest. + pub(crate) fn to_nix_nixbase32_string(&self) -> String { + format!( + "{}:{}", + self.algo(), + nixbase32::encode(self.digest_as_bytes()) + ) + } +} + +impl TryFrom<(HashAlgo, &[u8])> for NixHash { + type Error = Error; + + /// Constructs a new [NixHash] by specifying [HashAlgo] and digest. + /// It can fail if the passed digest length doesn't match what's expected for + /// the passed algo. + fn try_from(value: (HashAlgo, &[u8])) -> Result<Self> { + let (algo, digest) = value; + from_algo_and_digest(algo, digest) + } +} + +/// Constructs a new [NixHash] by specifying [HashAlgo] and digest. +/// It can fail if the passed digest length doesn't match what's expected for +/// the passed algo. +pub fn from_algo_and_digest(algo: HashAlgo, digest: &[u8]) -> Result<NixHash> { + if digest.len() != algo.digest_length() { + return Err(Error::InvalidEncodedDigestLength(digest.len(), algo)); + } + + Ok(match algo { + HashAlgo::Md5 => NixHash::Md5(digest.try_into().unwrap()), + HashAlgo::Sha1 => NixHash::Sha1(digest.try_into().unwrap()), + HashAlgo::Sha256 => NixHash::Sha256(digest.try_into().unwrap()), + HashAlgo::Sha512 => NixHash::Sha512(Box::new(digest.try_into().unwrap())), + }) +} + +/// Errors related to NixHash construction. +#[derive(Debug, Eq, PartialEq, thiserror::Error)] +pub enum Error { + #[error("invalid hash algo: {0}")] + InvalidAlgo(String), + #[error("invalid SRI string: {0}")] + InvalidSRI(String), + #[error("invalid encoded digest length '{0}' for algo {1}")] + InvalidEncodedDigestLength(usize, HashAlgo), + #[error("invalid base16 encoding: {0}")] + InvalidBase16Encoding(data_encoding::DecodeError), + #[error("invalid base32 encoding: {0}")] + InvalidBase32Encoding(data_encoding::DecodeError), + #[error("invalid base64 encoding: {0}")] + InvalidBase64Encoding(data_encoding::DecodeError), + #[error("conflicting hash algo: {0} (hash_algo) vs {1} (inline)")] + ConflictingHashAlgos(HashAlgo, HashAlgo), + #[error("missing inline hash algo, but no externally-specified algo: {0}")] + MissingInlineHashAlgo(String), +} + +/// Nix allows specifying hashes in various encodings, and magically just +/// derives the encoding. +/// This function parses strings to a NixHash. +/// +/// Hashes can be: +/// - Nix hash strings +/// - SRI hashes +/// - bare digests +/// +/// Encoding for Nix hash strings or bare digests can be: +/// - base16 (lowerhex), +/// - nixbase32, +/// - base64 (StdEncoding) +/// - sri string +/// +/// The encoding is derived from the length of the string and the hash type. +/// The hash is communicated out-of-band, but might also be in-band (in the +/// case of a nix hash string or SRI), in which it needs to be consistent with the +/// one communicated out-of-band. +pub fn from_str(s: &str, algo_str: Option<&str>) -> Result<NixHash> { + // if algo_str is some, parse or bail out + let algo: Option<HashAlgo> = if let Some(algo_str) = algo_str { + Some(algo_str.try_into()?) + } else { + None + }; + + // Peek at the beginning of the string to detect SRI hashes. + if s.starts_with("sha1-") + || s.starts_with("sha256-") + || s.starts_with("sha512-") + || s.starts_with("md5-") + { + let parsed_nixhash = from_sri_str(s)?; + + // ensure the algo matches with what has been passed externally, if so. + if let Some(algo) = algo { + if algo != parsed_nixhash.algo() { + return Err(Error::ConflictingHashAlgos(algo, parsed_nixhash.algo())); + } + } + return Ok(parsed_nixhash); + } + + // Peek at the beginning again to see if it's a Nix Hash + if s.starts_with("sha1:") + || s.starts_with("sha256:") + || s.starts_with("sha512:") + || s.starts_with("md5:") + { + let parsed_nixhash = from_nix_str(s)?; + // ensure the algo matches with what has been passed externally, if so. + if let Some(algo) = algo { + if algo != parsed_nixhash.algo() { + return Err(Error::ConflictingHashAlgos(algo, parsed_nixhash.algo())); + } + } + return Ok(parsed_nixhash); + } + + // Neither of these, assume a bare digest, so there MUST be an externally-passed algo. + match algo { + // Fail if there isn't. + None => Err(Error::MissingInlineHashAlgo(s.to_string())), + Some(algo) => decode_digest(s.as_bytes(), algo), + } +} + +/// Parses a Nix hash string ($algo:$digest) to a NixHash. +pub fn from_nix_str(s: &str) -> Result<NixHash> { + if let Some(rest) = s.strip_prefix("sha1:") { + decode_digest(rest.as_bytes(), HashAlgo::Sha1) + } else if let Some(rest) = s.strip_prefix("sha256:") { + decode_digest(rest.as_bytes(), HashAlgo::Sha256) + } else if let Some(rest) = s.strip_prefix("sha512:") { + decode_digest(rest.as_bytes(), HashAlgo::Sha512) + } else if let Some(rest) = s.strip_prefix("md5:") { + decode_digest(rest.as_bytes(), HashAlgo::Md5) + } else { + Err(Error::InvalidAlgo(s.to_string())) + } +} + +/// Parses a Nix SRI string to a NixHash. +/// Contrary to the SRI spec, Nix doesn't support SRI strings with multiple hashes, +/// only supports sha256 and sha512 from the spec, and supports sha1 and md5 +/// additionally. +/// It's also dealing with padding in a very funny way - accepting SRI strings +/// with an arbitrary amount of padding at the end - including *more* padding +/// characters. +pub fn from_sri_str(s: &str) -> Result<NixHash> { + // try to find the first occurence of "-" + let idx = s.as_bytes().iter().position(|&e| e == b'-'); + + if idx.is_none() { + return Err(Error::InvalidSRI(s.to_string())); + } + + let idx = idx.unwrap(); + + // try to map the part before that `-` to a supported hash algo: + let algo: HashAlgo = s[..idx].try_into()?; + + // the rest should be the digest (as Nix doesn't support more than one hash in an SRI string). + let encoded_digest = &s[idx + 1..]; + + // strip all padding characters. + let encoded_digest = encoded_digest.trim_end_matches('='); + + // If we are using BASE64_NOPAD, we must also disable the trailing bit checking otherwise we + // are bound to get invalid length for our inputs. + // See the `weird_sha256` example below. + let mut spec = BASE64_NOPAD.specification(); + spec.check_trailing_bits = false; + let encoder = spec + .encoding() + .expect("Tvix bug: failed to get the special base64 encoder for Nix SRI hashes"); + + if encoded_digest.len() == encoder.encode_len(algo.digest_length()) { + let digest = encoder + .decode(encoded_digest.as_bytes()) + .map_err(Error::InvalidBase64Encoding)?; + + Ok(from_algo_and_digest(algo, &digest).unwrap()) + } else { + Err(Error::InvalidEncodedDigestLength(idx, algo))? + } +} + +/// Decode a plain digest depending on the hash algo specified externally. +/// hexlower, nixbase32 and base64 encodings are supported - the encoding is +/// inferred from the input length. +fn decode_digest(s: &[u8], algo: HashAlgo) -> Result<NixHash> { + // for the chosen hash algo, calculate the expected (decoded) digest length + // (as bytes) + let digest = if s.len() == HEXLOWER.encode_len(algo.digest_length()) { + HEXLOWER + .decode(s.as_ref()) + .map_err(Error::InvalidBase16Encoding)? + } else if s.len() == nixbase32::encode_len(algo.digest_length()) { + nixbase32::decode(s).map_err(Error::InvalidBase32Encoding)? + } else if s.len() == BASE64.encode_len(algo.digest_length()) { + BASE64 + .decode(s.as_ref()) + .map_err(Error::InvalidBase64Encoding)? + } else { + Err(Error::InvalidEncodedDigestLength(s.len(), algo))? + }; + + Ok(from_algo_and_digest(algo, &digest).unwrap()) +} + +#[cfg(test)] +mod tests { + use crate::{ + nixbase32, + nixhash::{self, HashAlgo, NixHash}, + }; + use data_encoding::{BASE64, BASE64_NOPAD, HEXLOWER}; + use hex_literal::hex; + use test_case::test_case; + + const DIGEST_SHA1: [u8; 20] = hex!("6016777997c30ab02413cf5095622cd7924283ac"); + const DIGEST_SHA256: [u8; 32] = + hex!("a5ce9c155ed09397614646c9717fc7cd94b1023d7b76b618d409e4fefd6e9d39"); + const DIGEST_SHA512: [u8; 64] = hex!("ab40d0be3541f0774bba7815d13d10b03252e96e95f7dbb4ee99a3b431c21662fd6971a020160e39848aa5f305b9be0f78727b2b0789e39f124d21e92b8f39ef"); + const DIGEST_MD5: [u8; 16] = hex!("c4874a8897440b393d862d8fd459073f"); + + fn to_base16(digest: &[u8]) -> String { + HEXLOWER.encode(digest) + } + + fn to_nixbase32(digest: &[u8]) -> String { + nixbase32::encode(digest) + } + + fn to_base64(digest: &[u8]) -> String { + BASE64.encode(digest) + } + + fn to_base64_nopad(digest: &[u8]) -> String { + BASE64_NOPAD.encode(digest) + } + + // TODO + fn make_nixhash(algo: &HashAlgo, digest_encoded: String) -> String { + format!("{}:{}", algo, digest_encoded) + } + fn make_sri_string(algo: &HashAlgo, digest_encoded: String) -> String { + format!("{}-{}", algo, digest_encoded) + } + + /// Test parsing a hash string in various formats, and also when/how the out-of-band algo is needed. + #[test_case(&NixHash::Sha1(DIGEST_SHA1); "sha1")] + #[test_case(&NixHash::Sha256(DIGEST_SHA256); "sha256")] + #[test_case(&NixHash::Sha512(Box::new(DIGEST_SHA512)); "sha512")] + #[test_case(&NixHash::Md5(DIGEST_MD5); "md5")] + fn from_str(expected_hash: &NixHash) { + let algo = &expected_hash.algo(); + let digest = expected_hash.digest_as_bytes(); + // parse SRI + { + // base64 without out-of-band algo + let s = make_sri_string(algo, to_base64(digest)); + let h = nixhash::from_str(&s, None).expect("must succeed"); + assert_eq!(expected_hash, &h); + + // base64 with out-of-band-algo + let s = make_sri_string(algo, to_base64(digest)); + let h = nixhash::from_str(&s, Some(&expected_hash.algo().to_string())) + .expect("must succeed"); + assert_eq!(expected_hash, &h); + + // base64_nopad without out-of-band algo + let s = make_sri_string(algo, to_base64_nopad(digest)); + let h = nixhash::from_str(&s, None).expect("must succeed"); + assert_eq!(expected_hash, &h); + + // base64_nopad with out-of-band-algo + let s = make_sri_string(algo, to_base64_nopad(digest)); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, &h); + } + + // parse plain base16. should succeed with algo out-of-band, but fail without. + { + let s = to_base16(digest); + nixhash::from_str(&s, None).expect_err("must fail"); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, &h); + } + + // parse plain nixbase32. should succeed with algo out-of-band, but fail without. + { + let s = to_nixbase32(digest); + nixhash::from_str(&s, None).expect_err("must fail"); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, &h); + } + + // parse plain base64. should succeed with algo out-of-band, but fail without. + { + let s = to_base64(digest); + nixhash::from_str(&s, None).expect_err("must fail"); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, &h); + } + + // parse Nix hash strings + { + // base16. should succeed with both algo out-of-band and in-band. + { + let s = make_nixhash(algo, to_base16(digest)); + assert_eq!( + expected_hash, + &nixhash::from_str(&s, None).expect("must succeed") + ); + assert_eq!( + expected_hash, + &nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed") + ); + } + // nixbase32. should succeed with both algo out-of-band and in-band. + { + let s = make_nixhash(algo, to_nixbase32(digest)); + assert_eq!( + expected_hash, + &nixhash::from_str(&s, None).expect("must succeed") + ); + assert_eq!( + expected_hash, + &nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed") + ); + } + // base64. should succeed with both algo out-of-band and in-band. + { + let s = make_nixhash(algo, to_base64(digest)); + assert_eq!( + expected_hash, + &nixhash::from_str(&s, None).expect("must succeed") + ); + assert_eq!( + expected_hash, + &nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed") + ); + } + } + } + + /// Test parsing an SRI hash via the [nixhash::from_sri_str] method. + #[test] + fn from_sri_str() { + let nix_hash = nixhash::from_sri_str("sha256-pc6cFV7Qk5dhRkbJcX/HzZSxAj17drYY1Ank/v1unTk=") + .expect("must succeed"); + + assert_eq!(HashAlgo::Sha256, nix_hash.algo()); + assert_eq!( + &hex!("a5ce9c155ed09397614646c9717fc7cd94b1023d7b76b618d409e4fefd6e9d39"), + nix_hash.digest_as_bytes() + ) + } + + /// Test parsing sha512 SRI hash with various paddings, Nix accepts all of them. + #[test_case("sha512-7g91TBvYoYQorRTqo+rYD/i5YnWvUBLnqDhPHxBJDaBW7smuPMeRp6E6JOFuVN9bzN0QnH1ToUU0u9c2CjALEQ"; "no padding")] + #[test_case("sha512-7g91TBvYoYQorRTqo+rYD/i5YnWvUBLnqDhPHxBJDaBW7smuPMeRp6E6JOFuVN9bzN0QnH1ToUU0u9c2CjALEQ="; "wrong padding")] + #[test_case("sha512-7g91TBvYoYQorRTqo+rYD/i5YnWvUBLnqDhPHxBJDaBW7smuPMeRp6E6JOFuVN9bzN0QnH1ToUU0u9c2CjALEQ="; "correct padding")] + #[test_case("sha512-7g91TBvYoYQorRTqo+rYD/i5YnWvUBLnqDhPHxBJDaBW7smuPMeRp6E6JOFuVN9bzN0QnH1ToUU0u9c2CjALEQ="; "too much padding")] + fn from_sri_str_sha512_paddings(sri_str: &str) { + let nix_hash = nixhash::from_sri_str(sri_str).expect("must succeed"); + + assert_eq!(HashAlgo::Sha512, nix_hash.algo()); + assert_eq!( + &hex!("ee0f754c1bd8a18428ad14eaa3ead80ff8b96275af5012e7a8384f1f10490da056eec9ae3cc791a7a13a24e16e54df5bccdd109c7d53a14534bbd7360a300b11"), + nix_hash.digest_as_bytes() + ) + } + + /// Ensure we detect truncated base64 digests, where the digest size + /// doesn't match what's expected from that hash function. + #[test] + fn from_sri_str_truncated() { + nixhash::from_sri_str("sha256-pc6cFV7Qk5dhRkbJcX/HzZSxAj17drYY1Ank") + .expect_err("must fail"); + } + + /// Ensure we fail on SRI hashes that Nix doesn't support. + #[test] + fn from_sri_str_unsupported() { + nixhash::from_sri_str( + "sha384-o4UVSl89mIB0sFUK+3jQbG+C9Zc9dRlV/Xd3KAvXEbhqxu0J5OAdg6b6VHKHwQ7U", + ) + .expect_err("must fail"); + } + + /// Ensure we reject invalid base64 encoding + #[test] + fn from_sri_str_invalid_base64() { + nixhash::from_sri_str("sha256-invalid=base64").expect_err("must fail"); + } + + /// Ensure we reject SRI strings with multiple hashes, as Nix doesn't support that. + #[test] + fn from_sri_str_unsupported_multiple() { + nixhash::from_sri_str("sha256-ngth6szLtC1IJIYyz3lhftzL8SkrJkqPyPve+dGqa1Y= sha512-q0DQvjVB8HdLungV0T0QsDJS6W6V99u07pmjtDHCFmL9aXGgIBYOOYSKpfMFub4PeHJ7KweJ458STSHpK4857w==").expect_err("must fail"); + } + + /// Nix also accepts SRI strings with missing padding, but only in case the + /// string is expressed as SRI, so it still needs to have a `sha256-` prefix. + /// + /// This both seems to work if it is passed with and without specifying the + /// hash algo out-of-band (hash = "sha256-…" or sha256 = "sha256-…") + /// + /// Passing the same broken base64 string, but not as SRI, while passing + /// the hash algo out-of-band does not work. + #[test] + fn sha256_broken_padding() { + let broken_base64 = "fgIr3TyFGDAXP5+qoAaiMKDg/a1MlT6Fv/S/DaA24S8"; + // if padded with a trailing '=' + let expected_digest = + hex!("7e022bdd3c851830173f9faaa006a230a0e0fdad4c953e85bff4bf0da036e12f"); + + // passing hash algo out of band should succeed + let nix_hash = nixhash::from_str(&format!("sha256-{}", &broken_base64), Some("sha256")) + .expect("must succeed"); + assert_eq!(&expected_digest, &nix_hash.digest_as_bytes()); + + // not passing hash algo out of band should succeed + let nix_hash = + nixhash::from_str(&format!("sha256-{}", &broken_base64), None).expect("must succeed"); + assert_eq!(&expected_digest, &nix_hash.digest_as_bytes()); + + // not passing SRI, but hash algo out of band should fail + nixhash::from_str(broken_base64, Some("sha256")).expect_err("must fail"); + } + + /// As we decided to pass our hashes by trimming `=` completely, + /// we need to take into account hashes with padding requirements which + /// contains trailing bits which would be checked by `BASE64_NOPAD` and would + /// make the verification crash. + /// + /// This base64 has a trailing non-zero bit at bit 42. + #[test] + fn sha256_weird_base64() { + let weird_base64 = "syceJMUEknBDCHK8eGs6rUU3IQn+HnQfURfCrDxYPa9="; + let expected_digest = + hex!("b3271e24c5049270430872bc786b3aad45372109fe1e741f5117c2ac3c583daf"); + + let nix_hash = nixhash::from_str(&format!("sha256-{}", &weird_base64), Some("sha256")) + .expect("must succeed"); + assert_eq!(&expected_digest, &nix_hash.digest_as_bytes()); + + // not passing hash algo out of band should succeed + let nix_hash = + nixhash::from_str(&format!("sha256-{}", &weird_base64), None).expect("must succeed"); + assert_eq!(&expected_digest, &nix_hash.digest_as_bytes()); + + // not passing SRI, but hash algo out of band should fail + nixhash::from_str(weird_base64, Some("sha256")).expect_err("must fail"); + } +} diff --git a/tvix/nix-compat/src/store_path/mod.rs b/tvix/nix-compat/src/store_path/mod.rs new file mode 100644 index 000000000000..e23002bfb97e --- /dev/null +++ b/tvix/nix-compat/src/store_path/mod.rs @@ -0,0 +1,430 @@ +use crate::nixbase32; +use data_encoding::{DecodeError, BASE64}; +use std::{ + fmt, + path::PathBuf, + str::{self, FromStr}, +}; +use thiserror; + +#[cfg(target_family = "unix")] +use std::os::unix::ffi::OsStringExt; + +mod utils; + +pub use utils::*; + +pub const DIGEST_SIZE: usize = 20; +pub const ENCODED_DIGEST_SIZE: usize = nixbase32::encode_len(DIGEST_SIZE); + +// The store dir prefix, without trailing slash. +// That's usually where the Nix store is mounted at. +pub const STORE_DIR: &str = "/nix/store"; +pub const STORE_DIR_WITH_SLASH: &str = "/nix/store/"; + +/// Errors that can occur when parsing a literal store path +#[derive(Debug, PartialEq, Eq, thiserror::Error)] +pub enum Error { + #[error("Dash is missing between hash and name")] + MissingDash, + #[error("Hash encoding is invalid: {0}")] + InvalidHashEncoding(DecodeError), + #[error("Invalid length")] + InvalidLength, + #[error( + "Invalid name: \"{}\", character at position {} is invalid", + std::str::from_utf8(.0).unwrap_or(&BASE64.encode(.0)), + .1, + )] + InvalidName(Vec<u8>, u8), + #[error("Tried to parse an absolute path which was missing the store dir prefix.")] + MissingStoreDir, +} + +/// Represents a path in the Nix store (a direct child of [STORE_DIR]). +/// +/// It consists of a digest (20 bytes), and a name, which is a string. +/// The name may only contain ASCII alphanumeric, or one of the following +/// characters: `-`, `_`, `.`, `+`, `?`, `=`. +/// The name is usually used to describe the pname and version of a package. +/// Derivation paths can also be represented as store paths, their names just +/// end with the `.drv` prefix. +/// +/// A [StorePath] does not encode any additional subpath "inside" the store +/// path. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct StorePath { + digest: [u8; DIGEST_SIZE], + name: String, +} + +impl StorePath { + pub fn digest(&self) -> &[u8; DIGEST_SIZE] { + &self.digest + } + + pub fn name(&self) -> &str { + self.name.as_ref() + } +} + +impl PartialOrd for StorePath { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for StorePath { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.digest.cmp(&other.digest) + } +} + +impl FromStr for StorePath { + type Err = Error; + + /// Construct a [StorePath] by passing the `$digest-$name` string + /// that comes after [STORE_DIR_WITH_SLASH]. + fn from_str(s: &str) -> Result<Self, Self::Err> { + Self::from_bytes(s.as_bytes()) + } +} + +impl StorePath { + /// Construct a [StorePath] by passing the `$digest-$name` string + /// that comes after [STORE_DIR_WITH_SLASH]. + pub fn from_bytes(s: &[u8]) -> Result<StorePath, Error> { + Ok(StorePathRef::from_bytes(s)?.to_owned()) + } + + /// Decompose a string into a [StorePath] and a [PathBuf] containing the + /// rest of the path, or an error. + #[cfg(target_family = "unix")] + pub fn from_absolute_path_full(s: &str) -> Result<(StorePath, PathBuf), Error> { + // strip [STORE_DIR_WITH_SLASH] from s + match s.strip_prefix(STORE_DIR_WITH_SLASH) { + None => Err(Error::MissingStoreDir), + Some(rest) => { + // put rest in a PathBuf + let mut p = PathBuf::new(); + p.push(rest); + + let mut it = p.components(); + + // The first component of the rest must be parse-able as a [StorePath] + if let Some(first_component) = it.next() { + // convert first component to StorePath + let first_component_bytes = first_component.as_os_str().to_owned().into_vec(); + let store_path = StorePath::from_bytes(&first_component_bytes)?; + // collect rest + let rest_buf: PathBuf = it.collect(); + Ok((store_path, rest_buf)) + } else { + Err(Error::InvalidLength) // Well, or missing "/"? + } + } + } + } + + /// Returns an absolute store path string. + /// That is just the string representation, prefixed with the store prefix + /// ([STORE_DIR_WITH_SLASH]), + pub fn to_absolute_path(&self) -> String { + let sp_ref: StorePathRef = self.into(); + sp_ref.to_absolute_path() + } +} + +/// Like [StorePath], but without a heap allocation for the name. +/// Used by [StorePath] for parsing. +/// +#[derive(Debug, Eq, PartialEq)] +pub struct StorePathRef<'a> { + digest: [u8; DIGEST_SIZE], + name: &'a str, +} + +impl<'a> From<&'a StorePath> for StorePathRef<'a> { + fn from(&StorePath { digest, ref name }: &'a StorePath) -> Self { + StorePathRef { + digest, + name: name.as_ref(), + } + } +} + +impl<'a> StorePathRef<'a> { + pub fn digest(&self) -> &[u8; DIGEST_SIZE] { + &self.digest + } + + pub fn name(&self) -> &'a str { + self.name + } + + pub fn to_owned(&self) -> StorePath { + StorePath { + digest: self.digest, + name: self.name.to_owned(), + } + } + + /// Construct a [StorePathRef] from a name and digest. + pub fn from_name_and_digest(name: &'a str, digest: &[u8]) -> Result<Self, Error> { + Ok(Self { + name: validate_name(name.as_bytes())?, + digest: digest.try_into().map_err(|_| Error::InvalidLength)?, + }) + } + + /// Construct a [StorePathRef] from an absolute store path string. + /// This is equivalent to calling [StorePathRef::from_bytes], but stripping + /// the [STORE_DIR_WITH_SLASH] prefix before. + pub fn from_absolute_path(s: &'a [u8]) -> Result<Self, Error> { + match s.strip_prefix(STORE_DIR_WITH_SLASH.as_bytes()) { + Some(s_stripped) => Self::from_bytes(s_stripped), + None => Err(Error::MissingStoreDir), + } + } + + /// Construct a [StorePathRef] by passing the `$digest-$name` string + /// that comes after [STORE_DIR_WITH_SLASH]. + pub fn from_bytes(s: &'a [u8]) -> Result<Self, Error> { + // the whole string needs to be at least: + // + // - 32 characters (encoded hash) + // - 1 dash + // - 1 character for the name + if s.len() < ENCODED_DIGEST_SIZE + 2 { + Err(Error::InvalidLength)? + } + + let digest = nixbase32::decode_fixed(&s[..ENCODED_DIGEST_SIZE]) + .map_err(Error::InvalidHashEncoding)?; + + if s[ENCODED_DIGEST_SIZE] != b'-' { + return Err(Error::MissingDash); + } + + Ok(StorePathRef { + digest, + name: validate_name(&s[ENCODED_DIGEST_SIZE + 1..])?, + }) + } + + /// Returns an absolute store path string. + /// That is just the string representation, prefixed with the store prefix + /// ([STORE_DIR_WITH_SLASH]), + pub fn to_absolute_path(&self) -> String { + format!("{}{}", STORE_DIR_WITH_SLASH, self) + } +} + +/// NAME_CHARS contains `true` for bytes that are valid in store path names, +/// not accounting for '.' being permitted only past the first character. +static NAME_CHARS: [bool; 256] = { + let mut tbl = [false; 256]; + let mut c = 0; + + loop { + tbl[c as usize] = matches!(c, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'+' | b'-' | b'_' | b'?' | b'=' | b'.'); + + if c == u8::MAX { + break; + } + + c += 1; + } + + tbl +}; + +/// Checks a given &[u8] to match the restrictions for [StorePath::name], and +/// returns the name as string if successful. +pub(crate) fn validate_name(s: &(impl AsRef<[u8]> + ?Sized)) -> Result<&str, Error> { + let s = s.as_ref(); + + // Empty or excessively long names are not allowed. + if s.is_empty() || s.len() > 211 { + return Err(Error::InvalidLength); + } + + if s[0] == b'.' { + return Err(Error::InvalidName(s.to_vec(), 0)); + } + + let mut valid = true; + for &c in s { + valid = valid && NAME_CHARS[c as usize]; + } + + if !valid { + for (i, &c) in s.iter().enumerate() { + if !NAME_CHARS[c as usize] { + return Err(Error::InvalidName(s.to_vec(), i as u8)); + } + } + + unreachable!(); + } + + // SAFETY: We permit a subset of ASCII, which guarantees valid UTF-8. + Ok(unsafe { str::from_utf8_unchecked(s) }) +} + +impl fmt::Display for StorePath { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + StorePathRef::from(self).fmt(f) + } +} + +impl fmt::Display for StorePathRef<'_> { + /// The string representation of a store path starts with a digest (20 + /// bytes), [crate::nixbase32]-encoded, followed by a `-`, + /// and ends with the name. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}-{}", nixbase32::encode(&self.digest), self.name) + } +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use crate::store_path::{StorePathRef, DIGEST_SIZE}; + use hex_literal::hex; + use test_case::test_case; + + use super::{Error, StorePath}; + + #[test] + fn happy_path() { + let example_nix_path_str = + "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432"; + let nixpath = StorePath::from_bytes(example_nix_path_str.as_bytes()) + .expect("Error parsing example string"); + + let expected_digest: [u8; DIGEST_SIZE] = hex!("8a12321522fd91efbd60ebb2481af88580f61600"); + + assert_eq!("net-tools-1.60_p20170221182432", nixpath.name); + assert_eq!(nixpath.digest, expected_digest); + + assert_eq!(example_nix_path_str, nixpath.to_string()) + } + + /// This is the store path rejected when `nix-store --add`'ing an + /// empty `.gitignore` file. + /// + /// Nix 2.4 accidentally dropped this behaviour, but this is considered a bug. + /// See https://github.com/NixOS/nix/pull/9095. + #[test] + fn starts_with_dot() { + StorePath::from_bytes(b"fli4bwscgna7lpm7v5xgnjxrxh0yc7ra-.gitignore") + .expect_err("must fail"); + } + + #[test] + fn empty_name() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yy-").expect_err("must fail"); + } + + #[test] + fn excessive_length() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yy-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") + .expect_err("must fail"); + } + + #[test] + fn invalid_hash_length() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yy-net-tools-1.60_p20170221182432") + .expect_err("must fail"); + } + + #[test] + fn invalid_encoding_hash() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48aku4la-net-tools-1.60_p20170221182432") + .expect_err("must fail"); + } + + #[test] + fn more_than_just_the_bare_nix_store_path() { + StorePath::from_bytes( + b"00bgd045z0d4icpbc2yyz4gx48aku4la-net-tools-1.60_p20170221182432/bin/arp", + ) + .expect_err("must fail"); + } + + #[test] + fn no_dash_between_hash_and_name() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44lanet-tools-1.60_p20170221182432") + .expect_err("must fail"); + } + + #[test] + fn absolute_path() { + let example_nix_path_str = + "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432"; + let nixpath_expected = + StorePathRef::from_bytes(example_nix_path_str.as_bytes()).expect("must parse"); + + let nixpath_actual = StorePathRef::from_absolute_path( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432".as_bytes(), + ) + .expect("must parse"); + + assert_eq!(nixpath_expected, nixpath_actual); + + assert_eq!( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + nixpath_actual.to_absolute_path(), + ); + } + + #[test] + fn absolute_path_missing_prefix() { + assert_eq!( + Error::MissingStoreDir, + StorePathRef::from_absolute_path(b"foobar-123").expect_err("must fail") + ); + } + + #[test_case( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + (StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::new()) + ; "without prefix")] + #[test_case( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432/", + (StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::new()) + ; "without prefix, but trailing slash")] + #[test_case( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432/bin/arp", + (StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::from("bin/arp")) + ; "with prefix")] + #[test_case( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432/bin/arp/", + (StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::from("bin/arp/")) + ; "with prefix and trailing slash")] + fn from_absolute_path_full(s: &str, expected: (StorePath, PathBuf)) { + let actual = StorePath::from_absolute_path_full(s).expect("must succeed"); + assert_eq!(expected, actual); + } + + #[test] + fn from_absolute_path_errors() { + assert_eq!( + Error::InvalidLength, + StorePath::from_absolute_path_full("/nix/store/").expect_err("must fail") + ); + assert_eq!( + Error::InvalidLength, + StorePath::from_absolute_path_full("/nix/store/foo").expect_err("must fail") + ); + assert_eq!( + Error::MissingStoreDir, + StorePath::from_absolute_path_full( + "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432" + ) + .expect_err("must fail") + ); + } +} diff --git a/tvix/nix-compat/src/store_path/utils.rs b/tvix/nix-compat/src/store_path/utils.rs new file mode 100644 index 000000000000..0b75ef50647c --- /dev/null +++ b/tvix/nix-compat/src/store_path/utils.rs @@ -0,0 +1,296 @@ +use crate::nixbase32; +use crate::nixhash::{CAHash, NixHash}; +use crate::store_path::{Error, StorePathRef, DIGEST_SIZE, STORE_DIR}; +use sha2::{Digest, Sha256}; +use std::io::Write; +use thiserror; + +/// Errors that can occur when creating a content-addressed store path. +/// +/// This wraps the main [crate::store_path::Error].. +#[derive(Debug, PartialEq, Eq, thiserror::Error)] +pub enum BuildStorePathError { + #[error("Invalid Store Path: {0}")] + InvalidStorePath(Error), + /// This error occurs when we have references outside the SHA-256 + + /// Recursive case. The restriction comes from upstream Nix. It may be + /// lifted at some point but there isn't a pressing need to anticipate that. + #[error("References were not supported as much as requested")] + InvalidReference(), +} + +/// compress_hash takes an arbitrarily long sequence of bytes (usually +/// a hash digest), and returns a sequence of bytes of length +/// OUTPUT_SIZE. +/// +/// It's calculated by rotating through the bytes in the output buffer +/// (zero- initialized), and XOR'ing with each byte of the passed +/// input. It consumes 1 byte at a time, and XOR's it with the current +/// value in the output buffer. +/// +/// This mimics equivalent functionality in C++ Nix. +pub fn compress_hash<const OUTPUT_SIZE: usize>(input: &[u8]) -> [u8; OUTPUT_SIZE] { + let mut output = [0; OUTPUT_SIZE]; + + for (ii, ch) in input.iter().enumerate() { + output[ii % OUTPUT_SIZE] ^= ch; + } + + output +} + +/// This builds a store path, by calculating the text_hash_string of either a +/// derivation or a literal text file that may contain references. +/// If you don't want to have to pass the entire contents, you might want to use +/// [build_ca_path] instead. +pub fn build_text_path<S: AsRef<str>, I: IntoIterator<Item = S>, C: AsRef<[u8]>>( + name: &str, + content: C, + references: I, +) -> Result<StorePathRef<'_>, BuildStorePathError> { + // produce the sha256 digest of the contents + let content_digest = Sha256::new_with_prefix(content).finalize().into(); + + build_ca_path(name, &CAHash::Text(content_digest), references, false) +} + +/// This builds a store path from a [CAHash] and a list of references. +pub fn build_ca_path<'a, S: AsRef<str>, I: IntoIterator<Item = S>>( + name: &'a str, + ca_hash: &CAHash, + references: I, + self_reference: bool, +) -> Result<StorePathRef<'a>, BuildStorePathError> { + // self references are only allowed for CAHash::Nar(NixHash::Sha256(_)). + if self_reference { + let CAHash::Nar(NixHash::Sha256(_)) = ca_hash else { + return Err(BuildStorePathError::InvalidReference()); + }; + } + + let (ty, hash) = match &ca_hash { + CAHash::Text(ref digest) => ( + make_references_string("text", references, false), + NixHash::Sha256(*digest), + ), + CAHash::Nar(NixHash::Sha256(ref digest)) => ( + make_references_string("source", references, self_reference), + NixHash::Sha256(*digest), + ), + + // for all other CAHash::Nar, another custom scheme is used. + CAHash::Nar(ref hash) => { + if references.into_iter().next().is_some() { + return Err(BuildStorePathError::InvalidReference()); + } + + ( + "output:out".to_string(), + NixHash::Sha256( + Sha256::new_with_prefix(format!("fixed:out:r:{}:", hash.to_nix_hex_string())) + .finalize() + .into(), + ), + ) + } + // CaHash::Flat is using something very similar, except the `r:` prefix. + CAHash::Flat(ref hash) => { + if references.into_iter().next().is_some() { + return Err(BuildStorePathError::InvalidReference()); + } + + ( + "output:out".to_string(), + NixHash::Sha256( + Sha256::new_with_prefix(format!("fixed:out:{}:", hash.to_nix_hex_string())) + .finalize() + .into(), + ), + ) + } + }; + + build_store_path_from_fingerprint_parts(&ty, &hash, name) + .map_err(BuildStorePathError::InvalidStorePath) +} + +/// For given NAR sha256 digest and name, return the new [StorePathRef] this +/// would have, or an error, in case the name is invalid. +pub fn build_nar_based_store_path<'a>( + nar_sha256_digest: &[u8; 32], + name: &'a str, +) -> Result<StorePathRef<'a>, BuildStorePathError> { + let nar_hash_with_mode = CAHash::Nar(NixHash::Sha256(nar_sha256_digest.to_owned())); + + build_ca_path(name, &nar_hash_with_mode, Vec::<String>::new(), false) +} + +/// This builds an input-addressed store path. +/// +/// Input-addresed store paths are always derivation outputs, the "input" in question is the +/// derivation and its closure. +pub fn build_output_path<'a>( + drv_hash: &NixHash, + output_name: &str, + output_path_name: &'a str, +) -> Result<StorePathRef<'a>, Error> { + build_store_path_from_fingerprint_parts( + &(String::from("output:") + output_name), + drv_hash, + output_path_name, + ) +} + +/// This builds a store path from fingerprint parts. +/// Usually, that function is used from [build_text_path] and +/// passed a "text hash string" (starting with "text:" as fingerprint), +/// but other fingerprints starting with "output:" are also used in Derivation +/// output path calculation. +/// +/// The fingerprint is hashed with sha256, its digest is compressed to 20 bytes, +/// and nixbase32-encoded (32 characters). +fn build_store_path_from_fingerprint_parts<'a>( + ty: &str, + hash: &NixHash, + name: &'a str, +) -> Result<StorePathRef<'a>, Error> { + let digest: [u8; DIGEST_SIZE] = compress_hash(&{ + let mut h = Sha256::new(); + write!(h, "{ty}:{}:{STORE_DIR}:{name}", hash.to_nix_hex_string()).unwrap(); + h.finalize() + }); + + // name validation happens in here. + StorePathRef::from_name_and_digest(name, &digest) +} + +/// This contains the Nix logic to create "text hash strings", which are used +/// in `builtins.toFile`, as well as in Derivation Path calculation. +/// +/// A text hash is calculated by concatenating the following fields, separated by a `:`: +/// +/// - text +/// - references, individually joined by `:` +/// - the nix_hash_string representation of the sha256 digest of some contents +/// - the value of `storeDir` +/// - the name +fn make_references_string<S: AsRef<str>, I: IntoIterator<Item = S>>( + ty: &str, + references: I, + self_ref: bool, +) -> String { + let mut s = String::from(ty); + + for reference in references { + s.push(':'); + s.push_str(reference.as_ref()); + } + + if self_ref { + s.push_str(":self"); + } + + s +} + +/// Nix placeholders (i.e. values returned by `builtins.placeholder`) +/// are used to populate outputs with paths that must be +/// string-replaced with the actual placeholders later, at runtime. +/// +/// The actual placeholder is basically just a SHA256 hash encoded in +/// cppnix format. +pub fn hash_placeholder(name: &str) -> String { + let digest = Sha256::new_with_prefix(format!("nix-output:{}", name)).finalize(); + + format!("/{}", nixbase32::encode(&digest)) +} + +#[cfg(test)] +mod test { + use hex_literal::hex; + + use super::*; + use crate::nixhash::{CAHash, NixHash}; + + #[test] + fn build_text_path_with_zero_references() { + // This hash should match `builtins.toFile`, e.g.: + // + // nix-repl> builtins.toFile "foo" "bar" + // "/nix/store/vxjiwkjkn7x4079qvh1jkl5pn05j2aw0-foo" + + let store_path = build_text_path("foo", "bar", Vec::<String>::new()) + .expect("build_store_path() should succeed"); + + assert_eq!( + store_path.to_absolute_path().as_str(), + "/nix/store/vxjiwkjkn7x4079qvh1jkl5pn05j2aw0-foo" + ); + } + + #[test] + fn build_text_path_with_non_zero_references() { + // This hash should match: + // + // nix-repl> builtins.toFile "baz" "${builtins.toFile "foo" "bar"}" + // "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" + + let inner = build_text_path("foo", "bar", Vec::<String>::new()) + .expect("path_with_references() should succeed"); + let inner_path = inner.to_absolute_path(); + + let outer = build_text_path("baz", &inner_path, vec![inner_path.as_str()]) + .expect("path_with_references() should succeed"); + + assert_eq!( + outer.to_absolute_path().as_str(), + "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" + ); + } + + #[test] + fn build_sha1_path() { + let outer = build_ca_path( + "bar", + &CAHash::Nar(NixHash::Sha1(hex!( + "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33" + ))), + Vec::<String>::new(), + false, + ) + .expect("path_with_references() should succeed"); + + assert_eq!( + outer.to_absolute_path().as_str(), + "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar" + ); + } + + #[test] + fn build_store_path_with_non_zero_references() { + // This hash should match: + // + // nix-repl> builtins.toFile "baz" "${builtins.toFile "foo" "bar"}" + // "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" + // + // $ nix store make-content-addressed /nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz + // rewrote '/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz' to '/nix/store/s89y431zzhmdn3k8r96rvakryddkpv2v-baz' + let outer = build_ca_path( + "baz", + &CAHash::Nar(NixHash::Sha256( + nixbase32::decode(b"1xqkzcb3909fp07qngljr4wcdnrh1gdam1m2n29i6hhrxlmkgkv1") + .expect("nixbase32 should decode") + .try_into() + .expect("should have right len"), + )), + vec!["/nix/store/dxwkwjzdaq7ka55pkk252gh32bgpmql4-foo"], + false, + ) + .expect("path_with_references() should succeed"); + + assert_eq!( + outer.to_absolute_path().as_str(), + "/nix/store/s89y431zzhmdn3k8r96rvakryddkpv2v-baz" + ); + } +} diff --git a/tvix/nix-compat/testdata/narinfo.zst b/tvix/nix-compat/testdata/narinfo.zst new file mode 100644 index 000000000000..361a422da86e --- /dev/null +++ b/tvix/nix-compat/testdata/narinfo.zst Binary files differ |