diff options
Diffstat (limited to 'users/zseri/store-ref-scanner/src')
-rw-r--r-- | users/zseri/store-ref-scanner/src/hbm.rs | 167 | ||||
-rw-r--r-- | users/zseri/store-ref-scanner/src/lib.rs | 215 | ||||
-rw-r--r-- | users/zseri/store-ref-scanner/src/spec.rs | 40 |
3 files changed, 0 insertions, 422 deletions
diff --git a/users/zseri/store-ref-scanner/src/hbm.rs b/users/zseri/store-ref-scanner/src/hbm.rs deleted file mode 100644 index 2520efd8363d..000000000000 --- a/users/zseri/store-ref-scanner/src/hbm.rs +++ /dev/null @@ -1,167 +0,0 @@ -#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] -pub struct HalfBytesMask(pub [u8; 16]); - -#[allow(clippy::as_conversions, clippy::zero_prefixed_literal)] -impl HalfBytesMask { - pub const B32_REVSHA256: HalfBytesMask = - HalfBytesMask([0, 0, 0, 0, 0, 0, 255, 3, 0, 0, 0, 0, 222, 127, 207, 7]); - - pub const B64_BLAKE2B256: HalfBytesMask = HalfBytesMask([ - 0, 0, 0, 0, 0, 8, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7, - ]); - - pub const DFL_REST: HalfBytesMask = HalfBytesMask([ - 0, 0, 0, 0, 0, 104, 255, 163, 254, 255, 255, 135, 254, 255, 255, 7, - ]); - - #[inline] - pub const fn from_expanded(x: [bool; 128]) -> Self { - let mut ret = [0u8; 16]; - let mut idx = 0; - while idx < 16 { - let fin = idx * 8; - let mut idx2 = 0; - while idx2 < 8 { - if x[fin + idx2] { - ret[idx] += (1 << idx2) as u8; - } - idx2 += 1; - } - idx += 1; - } - Self(ret) - } - - /// create a mask by allowing all characters via the mask which are included in the given string - pub fn from_bytes(s: &[u8]) -> Self { - s.iter().fold(Self([0u8; 16]), |mut ret, &i| { - ret.set(i, true); - ret - }) - } - - pub const fn into_expanded(self) -> [bool; 128] { - let Self(ihbm) = self; - let mut ret = [false; 128]; - let mut idx = 0; - while idx < 16 { - let fin = idx * 8; - let curi = ihbm[idx]; - let mut idx2 = 0; - while idx2 < 8 { - ret[fin + idx2] = (curi >> idx2) & 0b1 != 0; - idx2 += 1; - } - idx += 1; - } - ret - } - - pub fn contains(&self, byte: u8) -> bool { - if byte >= 0x80 { - false - } else { - (self.0[usize::from(byte / 8)] >> u32::from(byte % 8)) & 0b1 != 0 - } - } - - pub fn set(&mut self, byte: u8, allow: bool) { - if byte >= 0x80 { - if cfg!(debug_assertions) { - panic!( - "tried to manipulate invalid byte {:?} in HalfBytesMask", - byte - ); - } else { - return; - } - } - let block = &mut self.0[usize::from(byte / 8)]; - let bitpat = (1 << u32::from(byte % 8)) as u8; - if allow { - *block |= bitpat; - } else { - *block &= !bitpat; - } - } - - #[cfg(test)] - fn count_ones(&self) -> u8 { - self.0 - .iter() - .map(|i| i.count_ones()) - .sum::<u32>() - .try_into() - .unwrap() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn maskbase() { - assert_eq!(HalfBytesMask::B32_REVSHA256.count_ones(), 32); - assert_eq!(HalfBytesMask::B64_BLAKE2B256.count_ones(), 64); - } - - #[test] - fn non_ascii() { - for i in 0x80..=0xff { - assert!(!HalfBytesMask::DFL_REST.contains(i)); - } - } - - #[test] - fn dflmask() { - assert_eq!( - HalfBytesMask::from_expanded( - [ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - ] - .map(|i| i != 0) - ), - Default::default(), - ); - - assert_eq!( - HalfBytesMask::from_expanded( - [ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - ] - .map(|i| i != 0) - ), - HalfBytesMask::B32_REVSHA256, - ); - - assert_eq!( - HalfBytesMask::from_expanded( - [ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - ] - .map(|i| i != 0) - ), - HalfBytesMask::B64_BLAKE2B256, - ); - - assert_eq!( - HalfBytesMask::from_bytes( - b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-._?=" - ), - HalfBytesMask::DFL_REST, - ); - } -} diff --git a/users/zseri/store-ref-scanner/src/lib.rs b/users/zseri/store-ref-scanner/src/lib.rs deleted file mode 100644 index 0f86a769fe63..000000000000 --- a/users/zseri/store-ref-scanner/src/lib.rs +++ /dev/null @@ -1,215 +0,0 @@ -#![no_std] -#![forbid(clippy::cast_ptr_alignment, trivial_casts, unconditional_recursion)] -#![deny(clippy::as_conversions)] - -mod hbm; -pub use hbm::HalfBytesMask; - -mod spec; -pub use spec::*; - -/// limit maximal length of store basename -const BASENAME_MAXLEN: usize = 255; - -/// this is a trait which implements the interface of possible inputs -/// (usually byte slices) -pub trait ScannerInput: AsRef<[u8]> + Sized { - /// Splits the input into two at the given index. - /// Afterwards self contains elements [at, len), and the returned input part contains elements [0, at). - fn split_to(&mut self, at: usize) -> Self; - fn finish(&mut self); -} - -impl ScannerInput for &[u8] { - fn split_to(&mut self, at: usize) -> Self { - let (a, b) = self.split_at(at); - *self = b; - a - } - - fn finish(&mut self) { - *self = &[]; - } -} - -impl ScannerInput for &mut [u8] { - fn split_to(&mut self, at: usize) -> Self { - // Lifetime dance taken from `impl Write for &mut [u8]`. - // Taken from crate `std`. - let (a, b) = core::mem::take(self).split_at_mut(at); - *self = b; - a - } - - fn finish(&mut self) { - *self = &mut []; - } -} - -/// this is the primary structure of this crate -/// -/// it represents a scanner which scans binary slices for store references, -/// and implements an iterator interfaces which returns these as byte slices. -pub struct StoreRefScanner<'x, Input: 'x> { - input: Input, - spec: &'x StoreSpec<'x>, -} - -impl<'x, Input> StoreRefScanner<'x, Input> -where - Input: ScannerInput + 'x, -{ - pub fn new(input: Input, spec: &'x StoreSpec<'x>) -> Self { - for i in [&spec.valid_hashbytes, &spec.valid_restbytes] { - for j in [b'\0', b' ', b'\t', b'\n', b'/', b'\\'] { - assert!(!i.contains(j)); - } - } - Self { input, spec } - } -} - -impl<'x, Input: 'x> Iterator for StoreRefScanner<'x, Input> -where - Input: ScannerInput + 'x, -{ - type Item = Input; - - fn next(&mut self) -> Option<Input> { - let hbl: usize = self.spec.hashbytes_len.into(); - 'outer: while !self.input.as_ref().is_empty() { - if !self.spec.path_to_store.is_empty() { - let p2sas = self.spec.path_to_store; - while !self.input.as_ref().starts_with(p2sas.as_bytes()) { - if self.input.as_ref().is_empty() { - break 'outer; - } - self.input.split_to(1); - } - self.input.split_to(p2sas.len()); - if self.input.as_ref().is_empty() { - break 'outer; - } - } - let hsep = matches!(self.input.as_ref().iter().next(), Some(b'/') | Some(b'\\')); - self.input.split_to(1); - if hsep && self.spec.check_rest(self.input.as_ref()) { - // we have found a valid hash - // rest contains the store basename and all following components - // now let's search for the end - // and then cut off possible following components after the basename - let rlen = self - .input - .as_ref() - .iter() - .enumerate() - .take(BASENAME_MAXLEN) - .skip(hbl) - .find(|&(_, &i)| !self.spec.valid_restbytes.contains(i)) - .map(|(eosp, _)| eosp) - .unwrap_or_else(|| core::cmp::min(BASENAME_MAXLEN, self.input.as_ref().len())); - return Some(self.input.split_to(rlen)); - } - } - self.input.finish(); - None - } -} - -#[cfg(test)] -mod tests { - use super::*; - extern crate alloc; - use alloc::{vec, vec::Vec}; - - #[test] - fn simple_nix2() { - let drv: &[u8] = br#" - Derive([("out","","r:sha256","")],[("/nix/store/2ax7bvjdfkzim69q957i0jlg0nvmapg0-util-linux-2.37.2.drv",["dev"]),("/nix/store/6b55ssmh8pzqsc4q4kw1yl3kqvr4fvqj-bash-5.1-p12.drv",["out"]),("/nix/store/fp2vx24kczlzv84avds28wyzsmrn8kyv-source.drv",["out"]),("/nix/store/s6c2lm5hpsvdwnxq9y1g3ngncghjzc3k-stdenv-linux.drv",["out"]),("/nix/store/xlnzpf4mzghi8vl0krabrgcbnqk5qjf3-pkg-config-wrapper-0.29.2.drv",["out"])],["/nix/store/03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch","/nix/store/2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/0g15yibzzi3rmw29gqlbms05x9dbghbvh61v1qggydvmzh3bginw/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs","/0sdk1r4l43yw4g6lmqdhd92vhdfhlwz3m76jxzvzsqsv63czw2km"),("builder","/0g15yibzzi3rmw29gqlbms05x9dbghbvh61v1qggydvmzh3bginw/bin/bash"),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck",""),("doInstallCheck",""),("makeFlags","PREFIX=/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9 UDEVLIBDIR=/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9/lib/udev/"),("name","bcache-tools-1.0.7"),("nativeBuildInputs","/1kw0rwgdyq9q69wmmsa5d2kap6p52b0yldbzi4w17bhcq5g5cp2f"),("out","/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9"),("outputHashAlgo","sha256"),("outputHashMode","recursive"),("outputs","out"),("patches","/nix/store/2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch /nix/store/03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch"),("pname","bcache-tools"),("preBuild","sed -e \"s|/bin/sh|/0g15yibzzi3rmw29gqlbms05x9dbghbvh61v1qggydvmzh3bginw/bin/sh|\" -i *.rules\n"),("preInstall","mkdir -p \"$out/sbin\" \"$out/lib/udev/rules.d\" \"$out/share/man/man8\"\n"),("prePatch","sed -e \"/INSTALL.*initramfs\\/hook/d\" \\\n -e \"/INSTALL.*initcpio\\/install/d\" \\\n -e \"/INSTALL.*dracut\\/module-setup.sh/d\" \\\n -e \"s/pkg-config/$PKG_CONFIG/\" \\\n -i Makefile\n"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/6izcafvfcbz19chi7hl20834g0fa043n-source"),("stdenv","/01ncyv8bxibj0imgfvmxgqy648n697bachil6aw6i46g1jk0bbds"),("strictDeps",""),("system","x86_64-linux"),("version","1.0.7")]) - "#; - // we convert everything into strings because it is way easier to compare elements in error messages - let refs: Vec<&str> = StoreRefScanner::new(drv, &StoreSpec::DFL_NIX2) - .map(|i| core::str::from_utf8(i).unwrap()) - .collect(); - let refs_expect: Vec<&[u8]> = vec![ - b"2ax7bvjdfkzim69q957i0jlg0nvmapg0-util-linux-2.37.2.drv", - b"6b55ssmh8pzqsc4q4kw1yl3kqvr4fvqj-bash-5.1-p12.drv", - b"fp2vx24kczlzv84avds28wyzsmrn8kyv-source.drv", - b"s6c2lm5hpsvdwnxq9y1g3ngncghjzc3k-stdenv-linux.drv", - b"xlnzpf4mzghi8vl0krabrgcbnqk5qjf3-pkg-config-wrapper-0.29.2.drv", - b"03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch", - b"2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch", - b"9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh", - b"9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh", - b"2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch", - b"03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch", - b"6izcafvfcbz19chi7hl20834g0fa043n-source", - ]; - let refs_expect: Vec<&str> = refs_expect - .into_iter() - .map(|i| core::str::from_utf8(i).unwrap()) - .collect(); - assert_eq!(refs, refs_expect); - } - - #[test] - fn simple_yzix1() { - // I haven't yet produced any yzix derivation which included /yzixs absolute paths... - let fake: &[u8] = br#" - /yzixs/4Zx1PBoft1YyAuKdhjAY1seZFHloxQ+8voHQRkRMuys: ASCII text - /yzixs/dNE3yogD4JHKHzNa2t3jQMZddT8wjqlMDB0naDIFo0A: ASCII text - /yzixs/FMluSVOHLc4bxX7F4lBCXafNljBnDn+rAM5HzG7k8LI: unified diff output, ASCII text - /yzixs/g2G3GRL87hGEdw9cq2BZWqDQP_HeHSPRLbJ9P9KH+HI: unified diff output, ASCII text - /yzixs/H08Av1ZAONwFdzVLpFQm0Sc0dvyk0sbnk82waoBig7I: ASCII text - /yzixs/IndARQp+gaGDLS3K+PeyXdaRqAcCyS3EIbRXkkYjC94: unified diff output, ASCII text - /yzixs/IrLPnbkEolTAuWRxkXpuvVs6Imb1iB6wUJcI+fxWwkU: POSIX shell script, ASCII text executable - /yzixs/JsS_H3n3TSh2R6fiIzgOPZdjSmRkV71vGxstJJKPmr4: unified diff output, ASCII text - /yzixs/LZ6pQh1x8DRxZ2IYzetBRS4LuE__IXFjpOfQPxHVwpw: unified diff output, ASCII text - /yzixs/mEi2RPep9daRs0JUvwt1JsDfgYSph5sH_+_ihwn8IGQ: ASCII text - /yzixs/nd4DyljinP3auDMHL_LrpsRJkWQpSHQK2jqtyyzWcBA: POSIX shell script, ASCII text executable - /yzixs/nzpaknF0_ONSHtd0i_e1E3pkLF1QPeJQhAB7x9Ogo_M: unified diff output, ASCII text - /yzixs/UZ3uzVUUMC1gKGLw6tg_aLFwoFrJedXB3xbhEgQOaiY: unified diff output, ASCII text - /yzixs/VKyXxKTXsDGxYJ24YgbvCc1bZkA5twp3TC+Gbi4Kwd8: unified diff output, ASCII text - /yzixs/VPJMl8O1xkc1LsJznpoQrCrQO0Iy+ODCPsgoUBLiRZc: unified diff output, ASCII text - /yzixs/W6r1ow001ASHRj+gtRfyj9Fb_gCO_pBztX8WhYXVdIc: unified diff output, ASCII text - /yzixs/xvwEcXIob_rQynUEtQiQbwaDXEobTVKEGaBMir9oH9k: unified diff output, ASCII text - /yzixs/ZPvQbRJrtyeSITvW3FUZvw99hhNOO3CFqGgmWgScxcg: ASCII text - "#; - let refs: Vec<&str> = StoreRefScanner::new(fake, &StoreSpec::DFL_YZIX1) - .map(|i| core::str::from_utf8(i).unwrap()) - .collect(); - let refs_expect: Vec<&[u8]> = vec![ - b"4Zx1PBoft1YyAuKdhjAY1seZFHloxQ+8voHQRkRMuys", - b"dNE3yogD4JHKHzNa2t3jQMZddT8wjqlMDB0naDIFo0A", - b"FMluSVOHLc4bxX7F4lBCXafNljBnDn+rAM5HzG7k8LI", - b"g2G3GRL87hGEdw9cq2BZWqDQP_HeHSPRLbJ9P9KH+HI", - b"H08Av1ZAONwFdzVLpFQm0Sc0dvyk0sbnk82waoBig7I", - b"IndARQp+gaGDLS3K+PeyXdaRqAcCyS3EIbRXkkYjC94", - b"IrLPnbkEolTAuWRxkXpuvVs6Imb1iB6wUJcI+fxWwkU", - b"JsS_H3n3TSh2R6fiIzgOPZdjSmRkV71vGxstJJKPmr4", - b"LZ6pQh1x8DRxZ2IYzetBRS4LuE__IXFjpOfQPxHVwpw", - b"mEi2RPep9daRs0JUvwt1JsDfgYSph5sH_+_ihwn8IGQ", - b"nd4DyljinP3auDMHL_LrpsRJkWQpSHQK2jqtyyzWcBA", - b"nzpaknF0_ONSHtd0i_e1E3pkLF1QPeJQhAB7x9Ogo_M", - b"UZ3uzVUUMC1gKGLw6tg_aLFwoFrJedXB3xbhEgQOaiY", - b"VKyXxKTXsDGxYJ24YgbvCc1bZkA5twp3TC+Gbi4Kwd8", - b"VPJMl8O1xkc1LsJznpoQrCrQO0Iy+ODCPsgoUBLiRZc", - b"W6r1ow001ASHRj+gtRfyj9Fb_gCO_pBztX8WhYXVdIc", - b"xvwEcXIob_rQynUEtQiQbwaDXEobTVKEGaBMir9oH9k", - b"ZPvQbRJrtyeSITvW3FUZvw99hhNOO3CFqGgmWgScxcg", - ]; - let refs_expect: Vec<&str> = refs_expect - .into_iter() - .map(|i| core::str::from_utf8(i).unwrap()) - .collect(); - assert_eq!(refs, refs_expect); - } - - #[test] - fn just_store() { - for i in [&StoreSpec::DFL_NIX2, &StoreSpec::DFL_YZIX1] { - let refs: Vec<&[u8]> = StoreRefScanner::new(i.path_to_store.as_bytes(), i).collect(); - assert!(refs.is_empty()); - } - } -} diff --git a/users/zseri/store-ref-scanner/src/spec.rs b/users/zseri/store-ref-scanner/src/spec.rs deleted file mode 100644 index 79da0842c529..000000000000 --- a/users/zseri/store-ref-scanner/src/spec.rs +++ /dev/null @@ -1,40 +0,0 @@ -use crate::hbm::HalfBytesMask; - -pub struct StoreSpec<'path> { - /// path to store without trailing slash - pub path_to_store: &'path str, - - /// compressed map of allowed ASCII characters in hash part - pub valid_hashbytes: HalfBytesMask, - - /// compressed map of allowed ASCII characters in part after hash - pub valid_restbytes: HalfBytesMask, - - /// exact length of hash part of store paths - pub hashbytes_len: u8, -} - -impl StoreSpec<'_> { - pub(crate) fn check_rest(&self, rest: &[u8]) -> bool { - let hbl = self.hashbytes_len.into(); - rest.iter() - .take(hbl) - .take_while(|&&i| self.valid_hashbytes.contains(i)) - .count() - == hbl - } - - pub const DFL_NIX2: StoreSpec<'static> = StoreSpec { - path_to_store: "/nix/store", - valid_hashbytes: HalfBytesMask::B32_REVSHA256, - valid_restbytes: HalfBytesMask::DFL_REST, - hashbytes_len: 32, - }; - - pub const DFL_YZIX1: StoreSpec<'static> = StoreSpec { - path_to_store: "/yzixs", - valid_hashbytes: HalfBytesMask::B64_BLAKE2B256, - valid_restbytes: HalfBytesMask::DFL_REST, - hashbytes_len: 43, - }; -} |