diff options
Diffstat (limited to 'users/zseri')
-rw-r--r-- | users/zseri/.gitignore | 2 | ||||
-rw-r--r-- | users/zseri/OWNERS | 3 | ||||
-rw-r--r-- | users/zseri/dbwospof.md | 112 | ||||
-rw-r--r-- | users/zseri/store-ref-scanner/.gitignore | 1 | ||||
-rw-r--r-- | users/zseri/store-ref-scanner/Cargo.toml | 6 | ||||
-rw-r--r-- | users/zseri/store-ref-scanner/default.nix | 49 | ||||
-rw-r--r-- | users/zseri/store-ref-scanner/fuzz/.gitignore | 2 | ||||
-rw-r--r-- | users/zseri/store-ref-scanner/fuzz/Cargo.lock | 44 | ||||
-rw-r--r-- | users/zseri/store-ref-scanner/fuzz/Cargo.toml | 36 | ||||
-rw-r--r-- | users/zseri/store-ref-scanner/fuzz/fuzz_targets/hbm-roundtrip.rs | 10 | ||||
-rw-r--r-- | users/zseri/store-ref-scanner/fuzz/fuzz_targets/nocrash.rs | 9 | ||||
-rw-r--r-- | users/zseri/store-ref-scanner/src/hbm.rs | 167 | ||||
-rw-r--r-- | users/zseri/store-ref-scanner/src/lib.rs | 215 | ||||
-rw-r--r-- | users/zseri/store-ref-scanner/src/spec.rs | 40 | ||||
-rw-r--r-- | users/zseri/store-ref-scanner/tests.nix | 32 |
15 files changed, 728 insertions, 0 deletions
diff --git a/users/zseri/.gitignore b/users/zseri/.gitignore new file mode 100644 index 000000000000..b8553ace5532 --- /dev/null +++ b/users/zseri/.gitignore @@ -0,0 +1,2 @@ +.#* +target/ diff --git a/users/zseri/OWNERS b/users/zseri/OWNERS new file mode 100644 index 000000000000..4f565712149c --- /dev/null +++ b/users/zseri/OWNERS @@ -0,0 +1,3 @@ +inherited: false +owners: + - zseri diff --git a/users/zseri/dbwospof.md b/users/zseri/dbwospof.md new file mode 100644 index 000000000000..f1d68cde069c --- /dev/null +++ b/users/zseri/dbwospof.md @@ -0,0 +1,112 @@ +# distributed build without single points of failure + +## problem statement +> If we want to distribute a build across several build nodes, and want to avoid +> a "single point of failure", what needs to be considered? + +## motivation + +* distribute the build across several build nodes, because some packages take + extremely long to build + (e.g. `firefox`, `thunderbird`, `qtwebengine`, `webkitgtk`, ...) +* avoid a centralised setup like e.g. with Hydra, because we want to keep using + an on-demand workflow as usual with Nix + (e.g. `nixos-rebuild` on each host when necessary). + +## list of abbreviations + +<dl> + <dt>CA</dt> <dd>content-addressed</dd> + <dt>drv</dt> <dd>derivation</dd> + <dt>FOD</dt> <dd>fixed-output derivation</dd> + <dt>IHA</dt> <dd>input-hash-addressed</dd> + <dt>inhash</dt> <dd>input hash</dd> + <dt>outhash</dt> <dd>output hash</dd> +</dl> + +## build graph + +The build graph can't be easily distributed. It is instead left on the coordinator, +and the build nodes just get individual build jobs, which just consist of +derivations (and some information about how to get the inputs from some central +or distributed store (e.g. Ceph), this may be transmitted "out of band"). + +## inhash-exclusive + +It is necessary that each derivation build is exclusive in the sense that +the same derivation is never build multiple times simultaneously, because +this otherwise either wastes compute resources (obviously) and, in the case +of non-deterministic builds, increases complexity +(the store needs to decide which result to prefer, and the build nodes with +"losing" build results need to pull the "winning" build results from the store, +replacing the local version). Although this might be unnecessary in case +of IHA drvs, enforcing it always reduces the amount of possible suprising +results when mixing CA drvs and IHA drvs. + +## what can be meaningfully distributed + +The following is strongly opinionated, but I consider the following +(based upon the original build graph implementation from yzix 12.2021): +* We can't push the entire build graph to each build node, because they would + overlap 100%, and thus create extreme contention on the inhash-exclusive lock +* We could try to split the build graph into multiple parts with independent + inputs (partitioning), but this can be really complex, and I'm not sure + if it is worth it... This also basically excludes the yzix node types + [ `Eval`, `AssertEqual` ] (should be done by the evaluator). + Implementing this option however would make an abort of a build graph + (the simple variant does not kill running tasks, + just stop new tasks from being scheduled) really hard, and complex to get right. +* It does not make sense to distribute "node tasks" across build nodes which + almost exclusively interact with the store, and are not CPU-bound, but I/O bound. + This applies to most, if not all, useful FODs. It applies to the yzix node types + [ `Dump`, `UnDump`, `Fetch`, `Require` ] (should be performed by evaluator+store). +* TODO: figure out how to do forced rebuilds (e.g. also prefer a node which is not + the build node of the previous realisation of that task) + +## coarse per-derivation workflow + +``` + derivation + | | + | | + key build + | | + | | + V V + inhash outhash + | (either CA or IHA) + \ / + \ / + \ / + realisation +``` + +## build results + +Just for completeness, two build results are currently considered: + +* success: the build succeeded, and the result is uploaded to the central store +* failure: the build failed (e.g. build process terminated via error exit code or was killed) +* another case might be "partial": the build succeeded, but uploading to the + central store failed (the result is only available on the build node that built it). + This case is interesting, because we don't need to rerun the build, just the upload step + needs to be fixed/done semi-manually (e.g. maybe the central store ran out of storage, + or the network was unavailable) + +## build task queue + +It is naïve to think that something like a queue via `rabbitmq` (`AMQP`) or `MQTT` +suffices, because some requirements are missing: + +1. some way to push build results to the clients, and these should be associated + to the build inputs (a hacky way might use multiple queues for that, e.g. + a `tasks` input queue and a `done` output queue). +2. some way to lock "inhashes" (see section inhash-exclusive). + +The second point is somewhat easy to realise using `etcd`, and using the `watch` +mechanism it can be used to simulate a queue, and the inhash-addressing of +queued derivations can be seamlessly integrated. + +TODO: maybe we want to adjust the priorities of tasks in the queue, but Nix currently +doesn't seem to do this, so consider this only when it starts to make sense as a +performance or lag optimization. diff --git a/users/zseri/store-ref-scanner/.gitignore b/users/zseri/store-ref-scanner/.gitignore new file mode 100644 index 000000000000..5a44eef09a54 --- /dev/null +++ b/users/zseri/store-ref-scanner/.gitignore @@ -0,0 +1 @@ +/Cargo.lock diff --git a/users/zseri/store-ref-scanner/Cargo.toml b/users/zseri/store-ref-scanner/Cargo.toml new file mode 100644 index 000000000000..ad565f09af2e --- /dev/null +++ b/users/zseri/store-ref-scanner/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "store-ref-scanner" +version = "0.1.0" +edition = "2021" + +[dependencies] diff --git a/users/zseri/store-ref-scanner/default.nix b/users/zseri/store-ref-scanner/default.nix new file mode 100644 index 000000000000..38f3fd64ecd7 --- /dev/null +++ b/users/zseri/store-ref-scanner/default.nix @@ -0,0 +1,49 @@ +{ depot, lib, pkgs, ... }: + +let + sourceFilter = name: type: + let + baseName = builtins.baseNameOf (builtins.toString name); + in + (baseName == "Cargo.toml") + || (type == "directory" && baseName == "src") + || (lib.hasSuffix ".rs" baseName) + ; +in + +pkgs.buildRustCrate rec { + pname = "store-ref-scanner"; + crateName = "store-ref-scanner"; + version = "0.1.0"; + edition = "2021"; + src = lib.cleanSourceWith { filter = sourceFilter; src = ./.; }; + + passthru.tests = pkgs.buildRustCrate { + pname = "store-ref-scanner-tests"; + inherit crateName src version edition; + buildTests = true; + postInstall = '' + set -ex + export RUST_BACKTRACE=1 + # recreate a file hierarchy as when running tests with cargo + # the source for test data + # build outputs + testRoot=target/debug + mkdir -p $testRoot + chmod +w -R . + # test harness executables are suffixed with a hash, + # like cargo does this allows to prevent name collision + # with the main executables of the crate + hash=$(basename $out) + ls -lasR $out + for file in $out/tests/*; do + f=$testRoot/$(basename $file)-$hash + cp $file $f + $f 2>&1 | tee -a $out/tests.log + done + rm -rf $out/tests + set +ex + ''; + }; + +} diff --git a/users/zseri/store-ref-scanner/fuzz/.gitignore b/users/zseri/store-ref-scanner/fuzz/.gitignore new file mode 100644 index 000000000000..b400c2782601 --- /dev/null +++ b/users/zseri/store-ref-scanner/fuzz/.gitignore @@ -0,0 +1,2 @@ +corpus +artifacts diff --git a/users/zseri/store-ref-scanner/fuzz/Cargo.lock b/users/zseri/store-ref-scanner/fuzz/Cargo.lock new file mode 100644 index 000000000000..7395dec05e45 --- /dev/null +++ b/users/zseri/store-ref-scanner/fuzz/Cargo.lock @@ -0,0 +1,44 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "arbitrary" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "510c76ecefdceada737ea728f4f9a84bd2e1ef29f1ba555e560940fe279954de" + +[[package]] +name = "cc" +version = "1.0.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22a9137b95ea06864e018375b72adfb7db6e6f68cfc8df5a04d00288050485ee" + +[[package]] +name = "libfuzzer-sys" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36a9a84a6e8b55dfefb04235e55edb2b9a2a18488fcae777a6bdaa6f06f1deb3" +dependencies = [ + "arbitrary", + "cc", + "once_cell", +] + +[[package]] +name = "once_cell" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" + +[[package]] +name = "store-ref-scanner" +version = "0.1.0" + +[[package]] +name = "store-ref-scanner-fuzz" +version = "0.0.0" +dependencies = [ + "libfuzzer-sys", + "store-ref-scanner", +] diff --git a/users/zseri/store-ref-scanner/fuzz/Cargo.toml b/users/zseri/store-ref-scanner/fuzz/Cargo.toml new file mode 100644 index 000000000000..1832be00329c --- /dev/null +++ b/users/zseri/store-ref-scanner/fuzz/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "store-ref-scanner-fuzz" +version = "0.0.0" +authors = ["Automatically generated"] +publish = false +edition = "2018" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +libfuzzer-sys = "0.4" + +[dependencies.store-ref-scanner] +path = ".." + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "hbm-roundtrip" +path = "fuzz_targets/hbm-roundtrip.rs" +test = false +doc = false + +[[bin]] +name = "nocrash" +path = "fuzz_targets/nocrash.rs" +test = false +doc = false + +[profile.release] +incremental = false +overflow-checks = true +panic = "abort" diff --git a/users/zseri/store-ref-scanner/fuzz/fuzz_targets/hbm-roundtrip.rs b/users/zseri/store-ref-scanner/fuzz/fuzz_targets/hbm-roundtrip.rs new file mode 100644 index 000000000000..9e21a7738a38 --- /dev/null +++ b/users/zseri/store-ref-scanner/fuzz/fuzz_targets/hbm-roundtrip.rs @@ -0,0 +1,10 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: [u8; 16]| { + use store_ref_scanner::HalfBytesMask; + let a = HalfBytesMask(data); + let b = a.into_expanded(); + let c = HalfBytesMask::from_expanded(b); + assert_eq!(a, c); +}); diff --git a/users/zseri/store-ref-scanner/fuzz/fuzz_targets/nocrash.rs b/users/zseri/store-ref-scanner/fuzz/fuzz_targets/nocrash.rs new file mode 100644 index 000000000000..48100a628d7a --- /dev/null +++ b/users/zseri/store-ref-scanner/fuzz/fuzz_targets/nocrash.rs @@ -0,0 +1,9 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; + +fuzz_target!(|data: &[u8]| { + use store_ref_scanner::{StoreRefScanner, StoreSpec}; + + StoreRefScanner::new(&data[..], &StoreSpec::DFL_NIX2).count(); + StoreRefScanner::new(&data[..], &StoreSpec::DFL_YZIX1).count(); +}); diff --git a/users/zseri/store-ref-scanner/src/hbm.rs b/users/zseri/store-ref-scanner/src/hbm.rs new file mode 100644 index 000000000000..2520efd8363d --- /dev/null +++ b/users/zseri/store-ref-scanner/src/hbm.rs @@ -0,0 +1,167 @@ +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub struct HalfBytesMask(pub [u8; 16]); + +#[allow(clippy::as_conversions, clippy::zero_prefixed_literal)] +impl HalfBytesMask { + pub const B32_REVSHA256: HalfBytesMask = + HalfBytesMask([0, 0, 0, 0, 0, 0, 255, 3, 0, 0, 0, 0, 222, 127, 207, 7]); + + pub const B64_BLAKE2B256: HalfBytesMask = HalfBytesMask([ + 0, 0, 0, 0, 0, 8, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, + ]); + + pub const DFL_REST: HalfBytesMask = HalfBytesMask([ + 0, 0, 0, 0, 0, 104, 255, 163, 254, 255, 255, 135, 254, 255, 255, 7, + ]); + + #[inline] + pub const fn from_expanded(x: [bool; 128]) -> Self { + let mut ret = [0u8; 16]; + let mut idx = 0; + while idx < 16 { + let fin = idx * 8; + let mut idx2 = 0; + while idx2 < 8 { + if x[fin + idx2] { + ret[idx] += (1 << idx2) as u8; + } + idx2 += 1; + } + idx += 1; + } + Self(ret) + } + + /// create a mask by allowing all characters via the mask which are included in the given string + pub fn from_bytes(s: &[u8]) -> Self { + s.iter().fold(Self([0u8; 16]), |mut ret, &i| { + ret.set(i, true); + ret + }) + } + + pub const fn into_expanded(self) -> [bool; 128] { + let Self(ihbm) = self; + let mut ret = [false; 128]; + let mut idx = 0; + while idx < 16 { + let fin = idx * 8; + let curi = ihbm[idx]; + let mut idx2 = 0; + while idx2 < 8 { + ret[fin + idx2] = (curi >> idx2) & 0b1 != 0; + idx2 += 1; + } + idx += 1; + } + ret + } + + pub fn contains(&self, byte: u8) -> bool { + if byte >= 0x80 { + false + } else { + (self.0[usize::from(byte / 8)] >> u32::from(byte % 8)) & 0b1 != 0 + } + } + + pub fn set(&mut self, byte: u8, allow: bool) { + if byte >= 0x80 { + if cfg!(debug_assertions) { + panic!( + "tried to manipulate invalid byte {:?} in HalfBytesMask", + byte + ); + } else { + return; + } + } + let block = &mut self.0[usize::from(byte / 8)]; + let bitpat = (1 << u32::from(byte % 8)) as u8; + if allow { + *block |= bitpat; + } else { + *block &= !bitpat; + } + } + + #[cfg(test)] + fn count_ones(&self) -> u8 { + self.0 + .iter() + .map(|i| i.count_ones()) + .sum::<u32>() + .try_into() + .unwrap() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn maskbase() { + assert_eq!(HalfBytesMask::B32_REVSHA256.count_ones(), 32); + assert_eq!(HalfBytesMask::B64_BLAKE2B256.count_ones(), 64); + } + + #[test] + fn non_ascii() { + for i in 0x80..=0xff { + assert!(!HalfBytesMask::DFL_REST.contains(i)); + } + } + + #[test] + fn dflmask() { + assert_eq!( + HalfBytesMask::from_expanded( + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ] + .map(|i| i != 0) + ), + Default::default(), + ); + + assert_eq!( + HalfBytesMask::from_expanded( + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + ] + .map(|i| i != 0) + ), + HalfBytesMask::B32_REVSHA256, + ); + + assert_eq!( + HalfBytesMask::from_expanded( + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, + ] + .map(|i| i != 0) + ), + HalfBytesMask::B64_BLAKE2B256, + ); + + assert_eq!( + HalfBytesMask::from_bytes( + b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-._?=" + ), + HalfBytesMask::DFL_REST, + ); + } +} diff --git a/users/zseri/store-ref-scanner/src/lib.rs b/users/zseri/store-ref-scanner/src/lib.rs new file mode 100644 index 000000000000..0f86a769fe63 --- /dev/null +++ b/users/zseri/store-ref-scanner/src/lib.rs @@ -0,0 +1,215 @@ +#![no_std] +#![forbid(clippy::cast_ptr_alignment, trivial_casts, unconditional_recursion)] +#![deny(clippy::as_conversions)] + +mod hbm; +pub use hbm::HalfBytesMask; + +mod spec; +pub use spec::*; + +/// limit maximal length of store basename +const BASENAME_MAXLEN: usize = 255; + +/// this is a trait which implements the interface of possible inputs +/// (usually byte slices) +pub trait ScannerInput: AsRef<[u8]> + Sized { + /// Splits the input into two at the given index. + /// Afterwards self contains elements [at, len), and the returned input part contains elements [0, at). + fn split_to(&mut self, at: usize) -> Self; + fn finish(&mut self); +} + +impl ScannerInput for &[u8] { + fn split_to(&mut self, at: usize) -> Self { + let (a, b) = self.split_at(at); + *self = b; + a + } + + fn finish(&mut self) { + *self = &[]; + } +} + +impl ScannerInput for &mut [u8] { + fn split_to(&mut self, at: usize) -> Self { + // Lifetime dance taken from `impl Write for &mut [u8]`. + // Taken from crate `std`. + let (a, b) = core::mem::take(self).split_at_mut(at); + *self = b; + a + } + + fn finish(&mut self) { + *self = &mut []; + } +} + +/// this is the primary structure of this crate +/// +/// it represents a scanner which scans binary slices for store references, +/// and implements an iterator interfaces which returns these as byte slices. +pub struct StoreRefScanner<'x, Input: 'x> { + input: Input, + spec: &'x StoreSpec<'x>, +} + +impl<'x, Input> StoreRefScanner<'x, Input> +where + Input: ScannerInput + 'x, +{ + pub fn new(input: Input, spec: &'x StoreSpec<'x>) -> Self { + for i in [&spec.valid_hashbytes, &spec.valid_restbytes] { + for j in [b'\0', b' ', b'\t', b'\n', b'/', b'\\'] { + assert!(!i.contains(j)); + } + } + Self { input, spec } + } +} + +impl<'x, Input: 'x> Iterator for StoreRefScanner<'x, Input> +where + Input: ScannerInput + 'x, +{ + type Item = Input; + + fn next(&mut self) -> Option<Input> { + let hbl: usize = self.spec.hashbytes_len.into(); + 'outer: while !self.input.as_ref().is_empty() { + if !self.spec.path_to_store.is_empty() { + let p2sas = self.spec.path_to_store; + while !self.input.as_ref().starts_with(p2sas.as_bytes()) { + if self.input.as_ref().is_empty() { + break 'outer; + } + self.input.split_to(1); + } + self.input.split_to(p2sas.len()); + if self.input.as_ref().is_empty() { + break 'outer; + } + } + let hsep = matches!(self.input.as_ref().iter().next(), Some(b'/') | Some(b'\\')); + self.input.split_to(1); + if hsep && self.spec.check_rest(self.input.as_ref()) { + // we have found a valid hash + // rest contains the store basename and all following components + // now let's search for the end + // and then cut off possible following components after the basename + let rlen = self + .input + .as_ref() + .iter() + .enumerate() + .take(BASENAME_MAXLEN) + .skip(hbl) + .find(|&(_, &i)| !self.spec.valid_restbytes.contains(i)) + .map(|(eosp, _)| eosp) + .unwrap_or_else(|| core::cmp::min(BASENAME_MAXLEN, self.input.as_ref().len())); + return Some(self.input.split_to(rlen)); + } + } + self.input.finish(); + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + extern crate alloc; + use alloc::{vec, vec::Vec}; + + #[test] + fn simple_nix2() { + let drv: &[u8] = br#" + Derive([("out","","r:sha256","")],[("/nix/store/2ax7bvjdfkzim69q957i0jlg0nvmapg0-util-linux-2.37.2.drv",["dev"]),("/nix/store/6b55ssmh8pzqsc4q4kw1yl3kqvr4fvqj-bash-5.1-p12.drv",["out"]),("/nix/store/fp2vx24kczlzv84avds28wyzsmrn8kyv-source.drv",["out"]),("/nix/store/s6c2lm5hpsvdwnxq9y1g3ngncghjzc3k-stdenv-linux.drv",["out"]),("/nix/store/xlnzpf4mzghi8vl0krabrgcbnqk5qjf3-pkg-config-wrapper-0.29.2.drv",["out"])],["/nix/store/03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch","/nix/store/2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/0g15yibzzi3rmw29gqlbms05x9dbghbvh61v1qggydvmzh3bginw/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs","/0sdk1r4l43yw4g6lmqdhd92vhdfhlwz3m76jxzvzsqsv63czw2km"),("builder","/0g15yibzzi3rmw29gqlbms05x9dbghbvh61v1qggydvmzh3bginw/bin/bash"),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck",""),("doInstallCheck",""),("makeFlags","PREFIX=/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9 UDEVLIBDIR=/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9/lib/udev/"),("name","bcache-tools-1.0.7"),("nativeBuildInputs","/1kw0rwgdyq9q69wmmsa5d2kap6p52b0yldbzi4w17bhcq5g5cp2f"),("out","/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9"),("outputHashAlgo","sha256"),("outputHashMode","recursive"),("outputs","out"),("patches","/nix/store/2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch /nix/store/03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch"),("pname","bcache-tools"),("preBuild","sed -e \"s|/bin/sh|/0g15yibzzi3rmw29gqlbms05x9dbghbvh61v1qggydvmzh3bginw/bin/sh|\" -i *.rules\n"),("preInstall","mkdir -p \"$out/sbin\" \"$out/lib/udev/rules.d\" \"$out/share/man/man8\"\n"),("prePatch","sed -e \"/INSTALL.*initramfs\\/hook/d\" \\\n -e \"/INSTALL.*initcpio\\/install/d\" \\\n -e \"/INSTALL.*dracut\\/module-setup.sh/d\" \\\n -e \"s/pkg-config/$PKG_CONFIG/\" \\\n -i Makefile\n"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/6izcafvfcbz19chi7hl20834g0fa043n-source"),("stdenv","/01ncyv8bxibj0imgfvmxgqy648n697bachil6aw6i46g1jk0bbds"),("strictDeps",""),("system","x86_64-linux"),("version","1.0.7")]) + "#; + // we convert everything into strings because it is way easier to compare elements in error messages + let refs: Vec<&str> = StoreRefScanner::new(drv, &StoreSpec::DFL_NIX2) + .map(|i| core::str::from_utf8(i).unwrap()) + .collect(); + let refs_expect: Vec<&[u8]> = vec![ + b"2ax7bvjdfkzim69q957i0jlg0nvmapg0-util-linux-2.37.2.drv", + b"6b55ssmh8pzqsc4q4kw1yl3kqvr4fvqj-bash-5.1-p12.drv", + b"fp2vx24kczlzv84avds28wyzsmrn8kyv-source.drv", + b"s6c2lm5hpsvdwnxq9y1g3ngncghjzc3k-stdenv-linux.drv", + b"xlnzpf4mzghi8vl0krabrgcbnqk5qjf3-pkg-config-wrapper-0.29.2.drv", + b"03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch", + b"2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch", + b"9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh", + b"9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh", + b"2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch", + b"03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch", + b"6izcafvfcbz19chi7hl20834g0fa043n-source", + ]; + let refs_expect: Vec<&str> = refs_expect + .into_iter() + .map(|i| core::str::from_utf8(i).unwrap()) + .collect(); + assert_eq!(refs, refs_expect); + } + + #[test] + fn simple_yzix1() { + // I haven't yet produced any yzix derivation which included /yzixs absolute paths... + let fake: &[u8] = br#" + /yzixs/4Zx1PBoft1YyAuKdhjAY1seZFHloxQ+8voHQRkRMuys: ASCII text + /yzixs/dNE3yogD4JHKHzNa2t3jQMZddT8wjqlMDB0naDIFo0A: ASCII text + /yzixs/FMluSVOHLc4bxX7F4lBCXafNljBnDn+rAM5HzG7k8LI: unified diff output, ASCII text + /yzixs/g2G3GRL87hGEdw9cq2BZWqDQP_HeHSPRLbJ9P9KH+HI: unified diff output, ASCII text + /yzixs/H08Av1ZAONwFdzVLpFQm0Sc0dvyk0sbnk82waoBig7I: ASCII text + /yzixs/IndARQp+gaGDLS3K+PeyXdaRqAcCyS3EIbRXkkYjC94: unified diff output, ASCII text + /yzixs/IrLPnbkEolTAuWRxkXpuvVs6Imb1iB6wUJcI+fxWwkU: POSIX shell script, ASCII text executable + /yzixs/JsS_H3n3TSh2R6fiIzgOPZdjSmRkV71vGxstJJKPmr4: unified diff output, ASCII text + /yzixs/LZ6pQh1x8DRxZ2IYzetBRS4LuE__IXFjpOfQPxHVwpw: unified diff output, ASCII text + /yzixs/mEi2RPep9daRs0JUvwt1JsDfgYSph5sH_+_ihwn8IGQ: ASCII text + /yzixs/nd4DyljinP3auDMHL_LrpsRJkWQpSHQK2jqtyyzWcBA: POSIX shell script, ASCII text executable + /yzixs/nzpaknF0_ONSHtd0i_e1E3pkLF1QPeJQhAB7x9Ogo_M: unified diff output, ASCII text + /yzixs/UZ3uzVUUMC1gKGLw6tg_aLFwoFrJedXB3xbhEgQOaiY: unified diff output, ASCII text + /yzixs/VKyXxKTXsDGxYJ24YgbvCc1bZkA5twp3TC+Gbi4Kwd8: unified diff output, ASCII text + /yzixs/VPJMl8O1xkc1LsJznpoQrCrQO0Iy+ODCPsgoUBLiRZc: unified diff output, ASCII text + /yzixs/W6r1ow001ASHRj+gtRfyj9Fb_gCO_pBztX8WhYXVdIc: unified diff output, ASCII text + /yzixs/xvwEcXIob_rQynUEtQiQbwaDXEobTVKEGaBMir9oH9k: unified diff output, ASCII text + /yzixs/ZPvQbRJrtyeSITvW3FUZvw99hhNOO3CFqGgmWgScxcg: ASCII text + "#; + let refs: Vec<&str> = StoreRefScanner::new(fake, &StoreSpec::DFL_YZIX1) + .map(|i| core::str::from_utf8(i).unwrap()) + .collect(); + let refs_expect: Vec<&[u8]> = vec![ + b"4Zx1PBoft1YyAuKdhjAY1seZFHloxQ+8voHQRkRMuys", + b"dNE3yogD4JHKHzNa2t3jQMZddT8wjqlMDB0naDIFo0A", + b"FMluSVOHLc4bxX7F4lBCXafNljBnDn+rAM5HzG7k8LI", + b"g2G3GRL87hGEdw9cq2BZWqDQP_HeHSPRLbJ9P9KH+HI", + b"H08Av1ZAONwFdzVLpFQm0Sc0dvyk0sbnk82waoBig7I", + b"IndARQp+gaGDLS3K+PeyXdaRqAcCyS3EIbRXkkYjC94", + b"IrLPnbkEolTAuWRxkXpuvVs6Imb1iB6wUJcI+fxWwkU", + b"JsS_H3n3TSh2R6fiIzgOPZdjSmRkV71vGxstJJKPmr4", + b"LZ6pQh1x8DRxZ2IYzetBRS4LuE__IXFjpOfQPxHVwpw", + b"mEi2RPep9daRs0JUvwt1JsDfgYSph5sH_+_ihwn8IGQ", + b"nd4DyljinP3auDMHL_LrpsRJkWQpSHQK2jqtyyzWcBA", + b"nzpaknF0_ONSHtd0i_e1E3pkLF1QPeJQhAB7x9Ogo_M", + b"UZ3uzVUUMC1gKGLw6tg_aLFwoFrJedXB3xbhEgQOaiY", + b"VKyXxKTXsDGxYJ24YgbvCc1bZkA5twp3TC+Gbi4Kwd8", + b"VPJMl8O1xkc1LsJznpoQrCrQO0Iy+ODCPsgoUBLiRZc", + b"W6r1ow001ASHRj+gtRfyj9Fb_gCO_pBztX8WhYXVdIc", + b"xvwEcXIob_rQynUEtQiQbwaDXEobTVKEGaBMir9oH9k", + b"ZPvQbRJrtyeSITvW3FUZvw99hhNOO3CFqGgmWgScxcg", + ]; + let refs_expect: Vec<&str> = refs_expect + .into_iter() + .map(|i| core::str::from_utf8(i).unwrap()) + .collect(); + assert_eq!(refs, refs_expect); + } + + #[test] + fn just_store() { + for i in [&StoreSpec::DFL_NIX2, &StoreSpec::DFL_YZIX1] { + let refs: Vec<&[u8]> = StoreRefScanner::new(i.path_to_store.as_bytes(), i).collect(); + assert!(refs.is_empty()); + } + } +} diff --git a/users/zseri/store-ref-scanner/src/spec.rs b/users/zseri/store-ref-scanner/src/spec.rs new file mode 100644 index 000000000000..79da0842c529 --- /dev/null +++ b/users/zseri/store-ref-scanner/src/spec.rs @@ -0,0 +1,40 @@ +use crate::hbm::HalfBytesMask; + +pub struct StoreSpec<'path> { + /// path to store without trailing slash + pub path_to_store: &'path str, + + /// compressed map of allowed ASCII characters in hash part + pub valid_hashbytes: HalfBytesMask, + + /// compressed map of allowed ASCII characters in part after hash + pub valid_restbytes: HalfBytesMask, + + /// exact length of hash part of store paths + pub hashbytes_len: u8, +} + +impl StoreSpec<'_> { + pub(crate) fn check_rest(&self, rest: &[u8]) -> bool { + let hbl = self.hashbytes_len.into(); + rest.iter() + .take(hbl) + .take_while(|&&i| self.valid_hashbytes.contains(i)) + .count() + == hbl + } + + pub const DFL_NIX2: StoreSpec<'static> = StoreSpec { + path_to_store: "/nix/store", + valid_hashbytes: HalfBytesMask::B32_REVSHA256, + valid_restbytes: HalfBytesMask::DFL_REST, + hashbytes_len: 32, + }; + + pub const DFL_YZIX1: StoreSpec<'static> = StoreSpec { + path_to_store: "/yzixs", + valid_hashbytes: HalfBytesMask::B64_BLAKE2B256, + valid_restbytes: HalfBytesMask::DFL_REST, + hashbytes_len: 43, + }; +} diff --git a/users/zseri/store-ref-scanner/tests.nix b/users/zseri/store-ref-scanner/tests.nix new file mode 100644 index 000000000000..a4c82fe3a936 --- /dev/null +++ b/users/zseri/store-ref-scanner/tests.nix @@ -0,0 +1,32 @@ +{ depot, lib, pkgs, ... }: + +let + parent = depot.users.zseri.store-ref-scanner; +in +pkgs.buildRustCrate { + pname = "store-ref-scanner-tests"; + inherit (parent) crateName src version edition; + buildTests = true; + postInstall = '' + set -ex + export RUST_BACKTRACE=1 + # recreate a file hierarchy as when running tests with cargo + # the source for test data + # build outputs + testRoot=target/debug + mkdir -p $testRoot + chmod +w -R . + # test harness executables are suffixed with a hash, + # like cargo does this allows to prevent name collision + # with the main executables of the crate + hash=$(basename $out) + ls -lasR $out + for file in $out/tests/*; do + f=$testRoot/$(basename $file)-$hash + cp $file $f + $f 2>&1 | tee -a $out/tests.log + done + rm -rf $out/tests + set +ex + ''; +} |