about summary refs log tree commit diff
path: root/users/fogti/store-ref-scanner/src
diff options
context:
space:
mode:
authorAlain Zscheile <fogti+devel@ytrizja.de>2023-07-02T16·43+0200
committerlukegb <lukegb@tvl.fyi>2023-07-07T20·06+0000
commit56c776d9e9175e135ed2cb7043685fc193be5662 (patch)
tree3319a185127cf87ebe4ddb2d7dde80da23f0f066 /users/fogti/store-ref-scanner/src
parente751372f2f2ffbb1d32e9729e3c83c2c12c29ea3 (diff)
fix(users): rename zseri -> fogti r/6396
in accordnace with similar renaming on other sites
(e.g. GitHub, Exozyme, chaos.social)

My experience with exozyme tells me that fully applying
this change might require manual editing of gerrits database
anyways to fix broken references/patch ownerships.

Change-Id: I024ff264c09b25d8f854c489d93458d1fce7e9f4
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8919
Autosubmit: lukegb <lukegb@tvl.fyi>
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
Reviewed-by: zseri <zseri.devel@ytrizja.de>
Diffstat (limited to 'users/fogti/store-ref-scanner/src')
-rw-r--r--users/fogti/store-ref-scanner/src/hbm.rs167
-rw-r--r--users/fogti/store-ref-scanner/src/lib.rs215
-rw-r--r--users/fogti/store-ref-scanner/src/spec.rs40
3 files changed, 422 insertions, 0 deletions
diff --git a/users/fogti/store-ref-scanner/src/hbm.rs b/users/fogti/store-ref-scanner/src/hbm.rs
new file mode 100644
index 000000000000..2520efd8363d
--- /dev/null
+++ b/users/fogti/store-ref-scanner/src/hbm.rs
@@ -0,0 +1,167 @@
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
+pub struct HalfBytesMask(pub [u8; 16]);
+
+#[allow(clippy::as_conversions, clippy::zero_prefixed_literal)]
+impl HalfBytesMask {
+    pub const B32_REVSHA256: HalfBytesMask =
+        HalfBytesMask([0, 0, 0, 0, 0, 0, 255, 3, 0, 0, 0, 0, 222, 127, 207, 7]);
+
+    pub const B64_BLAKE2B256: HalfBytesMask = HalfBytesMask([
+        0, 0, 0, 0, 0, 8, 255, 3, 254, 255, 255, 135, 254, 255, 255, 7,
+    ]);
+
+    pub const DFL_REST: HalfBytesMask = HalfBytesMask([
+        0, 0, 0, 0, 0, 104, 255, 163, 254, 255, 255, 135, 254, 255, 255, 7,
+    ]);
+
+    #[inline]
+    pub const fn from_expanded(x: [bool; 128]) -> Self {
+        let mut ret = [0u8; 16];
+        let mut idx = 0;
+        while idx < 16 {
+            let fin = idx * 8;
+            let mut idx2 = 0;
+            while idx2 < 8 {
+                if x[fin + idx2] {
+                    ret[idx] += (1 << idx2) as u8;
+                }
+                idx2 += 1;
+            }
+            idx += 1;
+        }
+        Self(ret)
+    }
+
+    /// create a mask by allowing all characters via the mask which are included in the given string
+    pub fn from_bytes(s: &[u8]) -> Self {
+        s.iter().fold(Self([0u8; 16]), |mut ret, &i| {
+            ret.set(i, true);
+            ret
+        })
+    }
+
+    pub const fn into_expanded(self) -> [bool; 128] {
+        let Self(ihbm) = self;
+        let mut ret = [false; 128];
+        let mut idx = 0;
+        while idx < 16 {
+            let fin = idx * 8;
+            let curi = ihbm[idx];
+            let mut idx2 = 0;
+            while idx2 < 8 {
+                ret[fin + idx2] = (curi >> idx2) & 0b1 != 0;
+                idx2 += 1;
+            }
+            idx += 1;
+        }
+        ret
+    }
+
+    pub fn contains(&self, byte: u8) -> bool {
+        if byte >= 0x80 {
+            false
+        } else {
+            (self.0[usize::from(byte / 8)] >> u32::from(byte % 8)) & 0b1 != 0
+        }
+    }
+
+    pub fn set(&mut self, byte: u8, allow: bool) {
+        if byte >= 0x80 {
+            if cfg!(debug_assertions) {
+                panic!(
+                    "tried to manipulate invalid byte {:?} in HalfBytesMask",
+                    byte
+                );
+            } else {
+                return;
+            }
+        }
+        let block = &mut self.0[usize::from(byte / 8)];
+        let bitpat = (1 << u32::from(byte % 8)) as u8;
+        if allow {
+            *block |= bitpat;
+        } else {
+            *block &= !bitpat;
+        }
+    }
+
+    #[cfg(test)]
+    fn count_ones(&self) -> u8 {
+        self.0
+            .iter()
+            .map(|i| i.count_ones())
+            .sum::<u32>()
+            .try_into()
+            .unwrap()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn maskbase() {
+        assert_eq!(HalfBytesMask::B32_REVSHA256.count_ones(), 32);
+        assert_eq!(HalfBytesMask::B64_BLAKE2B256.count_ones(), 64);
+    }
+
+    #[test]
+    fn non_ascii() {
+        for i in 0x80..=0xff {
+            assert!(!HalfBytesMask::DFL_REST.contains(i));
+        }
+    }
+
+    #[test]
+    fn dflmask() {
+        assert_eq!(
+            HalfBytesMask::from_expanded(
+                [
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                ]
+                .map(|i| i != 0)
+            ),
+            Default::default(),
+        );
+
+        assert_eq!(
+            HalfBytesMask::from_expanded(
+                [
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+                    1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
+                    1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+                ]
+                .map(|i| i != 0)
+            ),
+            HalfBytesMask::B32_REVSHA256,
+        );
+
+        assert_eq!(
+            HalfBytesMask::from_expanded(
+                [
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1,
+                    1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
+                ]
+                .map(|i| i != 0)
+            ),
+            HalfBytesMask::B64_BLAKE2B256,
+        );
+
+        assert_eq!(
+            HalfBytesMask::from_bytes(
+                b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-._?="
+            ),
+            HalfBytesMask::DFL_REST,
+        );
+    }
+}
diff --git a/users/fogti/store-ref-scanner/src/lib.rs b/users/fogti/store-ref-scanner/src/lib.rs
new file mode 100644
index 000000000000..0f86a769fe63
--- /dev/null
+++ b/users/fogti/store-ref-scanner/src/lib.rs
@@ -0,0 +1,215 @@
+#![no_std]
+#![forbid(clippy::cast_ptr_alignment, trivial_casts, unconditional_recursion)]
+#![deny(clippy::as_conversions)]
+
+mod hbm;
+pub use hbm::HalfBytesMask;
+
+mod spec;
+pub use spec::*;
+
+/// limit maximal length of store basename
+const BASENAME_MAXLEN: usize = 255;
+
+/// this is a trait which implements the interface of possible inputs
+/// (usually byte slices)
+pub trait ScannerInput: AsRef<[u8]> + Sized {
+    /// Splits the input into two at the given index.
+    /// Afterwards self contains elements [at, len), and the returned input part contains elements [0, at).
+    fn split_to(&mut self, at: usize) -> Self;
+    fn finish(&mut self);
+}
+
+impl ScannerInput for &[u8] {
+    fn split_to(&mut self, at: usize) -> Self {
+        let (a, b) = self.split_at(at);
+        *self = b;
+        a
+    }
+
+    fn finish(&mut self) {
+        *self = &[];
+    }
+}
+
+impl ScannerInput for &mut [u8] {
+    fn split_to(&mut self, at: usize) -> Self {
+        // Lifetime dance taken from `impl Write for &mut [u8]`.
+        // Taken from crate `std`.
+        let (a, b) = core::mem::take(self).split_at_mut(at);
+        *self = b;
+        a
+    }
+
+    fn finish(&mut self) {
+        *self = &mut [];
+    }
+}
+
+/// this is the primary structure of this crate
+///
+/// it represents a scanner which scans binary slices for store references,
+/// and implements an iterator interfaces which returns these as byte slices.
+pub struct StoreRefScanner<'x, Input: 'x> {
+    input: Input,
+    spec: &'x StoreSpec<'x>,
+}
+
+impl<'x, Input> StoreRefScanner<'x, Input>
+where
+    Input: ScannerInput + 'x,
+{
+    pub fn new(input: Input, spec: &'x StoreSpec<'x>) -> Self {
+        for i in [&spec.valid_hashbytes, &spec.valid_restbytes] {
+            for j in [b'\0', b' ', b'\t', b'\n', b'/', b'\\'] {
+                assert!(!i.contains(j));
+            }
+        }
+        Self { input, spec }
+    }
+}
+
+impl<'x, Input: 'x> Iterator for StoreRefScanner<'x, Input>
+where
+    Input: ScannerInput + 'x,
+{
+    type Item = Input;
+
+    fn next(&mut self) -> Option<Input> {
+        let hbl: usize = self.spec.hashbytes_len.into();
+        'outer: while !self.input.as_ref().is_empty() {
+            if !self.spec.path_to_store.is_empty() {
+                let p2sas = self.spec.path_to_store;
+                while !self.input.as_ref().starts_with(p2sas.as_bytes()) {
+                    if self.input.as_ref().is_empty() {
+                        break 'outer;
+                    }
+                    self.input.split_to(1);
+                }
+                self.input.split_to(p2sas.len());
+                if self.input.as_ref().is_empty() {
+                    break 'outer;
+                }
+            }
+            let hsep = matches!(self.input.as_ref().iter().next(), Some(b'/') | Some(b'\\'));
+            self.input.split_to(1);
+            if hsep && self.spec.check_rest(self.input.as_ref()) {
+                // we have found a valid hash
+                // rest contains the store basename and all following components
+                // now let's search for the end
+                // and then cut off possible following components after the basename
+                let rlen = self
+                    .input
+                    .as_ref()
+                    .iter()
+                    .enumerate()
+                    .take(BASENAME_MAXLEN)
+                    .skip(hbl)
+                    .find(|&(_, &i)| !self.spec.valid_restbytes.contains(i))
+                    .map(|(eosp, _)| eosp)
+                    .unwrap_or_else(|| core::cmp::min(BASENAME_MAXLEN, self.input.as_ref().len()));
+                return Some(self.input.split_to(rlen));
+            }
+        }
+        self.input.finish();
+        None
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    extern crate alloc;
+    use alloc::{vec, vec::Vec};
+
+    #[test]
+    fn simple_nix2() {
+        let drv: &[u8] = br#"
+            Derive([("out","","r:sha256","")],[("/nix/store/2ax7bvjdfkzim69q957i0jlg0nvmapg0-util-linux-2.37.2.drv",["dev"]),("/nix/store/6b55ssmh8pzqsc4q4kw1yl3kqvr4fvqj-bash-5.1-p12.drv",["out"]),("/nix/store/fp2vx24kczlzv84avds28wyzsmrn8kyv-source.drv",["out"]),("/nix/store/s6c2lm5hpsvdwnxq9y1g3ngncghjzc3k-stdenv-linux.drv",["out"]),("/nix/store/xlnzpf4mzghi8vl0krabrgcbnqk5qjf3-pkg-config-wrapper-0.29.2.drv",["out"])],["/nix/store/03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch","/nix/store/2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/0g15yibzzi3rmw29gqlbms05x9dbghbvh61v1qggydvmzh3bginw/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs","/0sdk1r4l43yw4g6lmqdhd92vhdfhlwz3m76jxzvzsqsv63czw2km"),("builder","/0g15yibzzi3rmw29gqlbms05x9dbghbvh61v1qggydvmzh3bginw/bin/bash"),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck",""),("doInstallCheck",""),("makeFlags","PREFIX=/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9 UDEVLIBDIR=/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9/lib/udev/"),("name","bcache-tools-1.0.7"),("nativeBuildInputs","/1kw0rwgdyq9q69wmmsa5d2kap6p52b0yldbzi4w17bhcq5g5cp2f"),("out","/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9"),("outputHashAlgo","sha256"),("outputHashMode","recursive"),("outputs","out"),("patches","/nix/store/2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch /nix/store/03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch"),("pname","bcache-tools"),("preBuild","sed -e \"s|/bin/sh|/0g15yibzzi3rmw29gqlbms05x9dbghbvh61v1qggydvmzh3bginw/bin/sh|\" -i *.rules\n"),("preInstall","mkdir -p \"$out/sbin\" \"$out/lib/udev/rules.d\" \"$out/share/man/man8\"\n"),("prePatch","sed -e \"/INSTALL.*initramfs\\/hook/d\" \\\n    -e \"/INSTALL.*initcpio\\/install/d\" \\\n    -e \"/INSTALL.*dracut\\/module-setup.sh/d\" \\\n    -e \"s/pkg-config/$PKG_CONFIG/\" \\\n    -i Makefile\n"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/6izcafvfcbz19chi7hl20834g0fa043n-source"),("stdenv","/01ncyv8bxibj0imgfvmxgqy648n697bachil6aw6i46g1jk0bbds"),("strictDeps",""),("system","x86_64-linux"),("version","1.0.7")])
+        "#;
+        // we convert everything into strings because it is way easier to compare elements in error messages
+        let refs: Vec<&str> = StoreRefScanner::new(drv, &StoreSpec::DFL_NIX2)
+            .map(|i| core::str::from_utf8(i).unwrap())
+            .collect();
+        let refs_expect: Vec<&[u8]> = vec![
+            b"2ax7bvjdfkzim69q957i0jlg0nvmapg0-util-linux-2.37.2.drv",
+            b"6b55ssmh8pzqsc4q4kw1yl3kqvr4fvqj-bash-5.1-p12.drv",
+            b"fp2vx24kczlzv84avds28wyzsmrn8kyv-source.drv",
+            b"s6c2lm5hpsvdwnxq9y1g3ngncghjzc3k-stdenv-linux.drv",
+            b"xlnzpf4mzghi8vl0krabrgcbnqk5qjf3-pkg-config-wrapper-0.29.2.drv",
+            b"03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch",
+            b"2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch",
+            b"9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh",
+            b"9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh",
+            b"2q3z7587yhlz0i2xvfvvap42zk5carlv-bcache-udev-modern.patch",
+            b"03sl46khd8gmjpsad7223m32ma965vy9-fix-static.patch",
+            b"6izcafvfcbz19chi7hl20834g0fa043n-source",
+        ];
+        let refs_expect: Vec<&str> = refs_expect
+            .into_iter()
+            .map(|i| core::str::from_utf8(i).unwrap())
+            .collect();
+        assert_eq!(refs, refs_expect);
+    }
+
+    #[test]
+    fn simple_yzix1() {
+        // I haven't yet produced any yzix derivation which included /yzixs absolute paths...
+        let fake: &[u8] = br#"
+            /yzixs/4Zx1PBoft1YyAuKdhjAY1seZFHloxQ+8voHQRkRMuys:         ASCII text
+            /yzixs/dNE3yogD4JHKHzNa2t3jQMZddT8wjqlMDB0naDIFo0A:         ASCII text
+            /yzixs/FMluSVOHLc4bxX7F4lBCXafNljBnDn+rAM5HzG7k8LI:         unified diff output, ASCII text
+            /yzixs/g2G3GRL87hGEdw9cq2BZWqDQP_HeHSPRLbJ9P9KH+HI:         unified diff output, ASCII text
+            /yzixs/H08Av1ZAONwFdzVLpFQm0Sc0dvyk0sbnk82waoBig7I:         ASCII text
+            /yzixs/IndARQp+gaGDLS3K+PeyXdaRqAcCyS3EIbRXkkYjC94:         unified diff output, ASCII text
+            /yzixs/IrLPnbkEolTAuWRxkXpuvVs6Imb1iB6wUJcI+fxWwkU:         POSIX shell script, ASCII text executable
+            /yzixs/JsS_H3n3TSh2R6fiIzgOPZdjSmRkV71vGxstJJKPmr4:         unified diff output, ASCII text
+            /yzixs/LZ6pQh1x8DRxZ2IYzetBRS4LuE__IXFjpOfQPxHVwpw:         unified diff output, ASCII text
+            /yzixs/mEi2RPep9daRs0JUvwt1JsDfgYSph5sH_+_ihwn8IGQ:         ASCII text
+            /yzixs/nd4DyljinP3auDMHL_LrpsRJkWQpSHQK2jqtyyzWcBA:         POSIX shell script, ASCII text executable
+            /yzixs/nzpaknF0_ONSHtd0i_e1E3pkLF1QPeJQhAB7x9Ogo_M:         unified diff output, ASCII text
+            /yzixs/UZ3uzVUUMC1gKGLw6tg_aLFwoFrJedXB3xbhEgQOaiY:         unified diff output, ASCII text
+            /yzixs/VKyXxKTXsDGxYJ24YgbvCc1bZkA5twp3TC+Gbi4Kwd8:         unified diff output, ASCII text
+            /yzixs/VPJMl8O1xkc1LsJznpoQrCrQO0Iy+ODCPsgoUBLiRZc:         unified diff output, ASCII text
+            /yzixs/W6r1ow001ASHRj+gtRfyj9Fb_gCO_pBztX8WhYXVdIc:         unified diff output, ASCII text
+            /yzixs/xvwEcXIob_rQynUEtQiQbwaDXEobTVKEGaBMir9oH9k:         unified diff output, ASCII text
+            /yzixs/ZPvQbRJrtyeSITvW3FUZvw99hhNOO3CFqGgmWgScxcg:         ASCII text
+        "#;
+        let refs: Vec<&str> = StoreRefScanner::new(fake, &StoreSpec::DFL_YZIX1)
+            .map(|i| core::str::from_utf8(i).unwrap())
+            .collect();
+        let refs_expect: Vec<&[u8]> = vec![
+            b"4Zx1PBoft1YyAuKdhjAY1seZFHloxQ+8voHQRkRMuys",
+            b"dNE3yogD4JHKHzNa2t3jQMZddT8wjqlMDB0naDIFo0A",
+            b"FMluSVOHLc4bxX7F4lBCXafNljBnDn+rAM5HzG7k8LI",
+            b"g2G3GRL87hGEdw9cq2BZWqDQP_HeHSPRLbJ9P9KH+HI",
+            b"H08Av1ZAONwFdzVLpFQm0Sc0dvyk0sbnk82waoBig7I",
+            b"IndARQp+gaGDLS3K+PeyXdaRqAcCyS3EIbRXkkYjC94",
+            b"IrLPnbkEolTAuWRxkXpuvVs6Imb1iB6wUJcI+fxWwkU",
+            b"JsS_H3n3TSh2R6fiIzgOPZdjSmRkV71vGxstJJKPmr4",
+            b"LZ6pQh1x8DRxZ2IYzetBRS4LuE__IXFjpOfQPxHVwpw",
+            b"mEi2RPep9daRs0JUvwt1JsDfgYSph5sH_+_ihwn8IGQ",
+            b"nd4DyljinP3auDMHL_LrpsRJkWQpSHQK2jqtyyzWcBA",
+            b"nzpaknF0_ONSHtd0i_e1E3pkLF1QPeJQhAB7x9Ogo_M",
+            b"UZ3uzVUUMC1gKGLw6tg_aLFwoFrJedXB3xbhEgQOaiY",
+            b"VKyXxKTXsDGxYJ24YgbvCc1bZkA5twp3TC+Gbi4Kwd8",
+            b"VPJMl8O1xkc1LsJznpoQrCrQO0Iy+ODCPsgoUBLiRZc",
+            b"W6r1ow001ASHRj+gtRfyj9Fb_gCO_pBztX8WhYXVdIc",
+            b"xvwEcXIob_rQynUEtQiQbwaDXEobTVKEGaBMir9oH9k",
+            b"ZPvQbRJrtyeSITvW3FUZvw99hhNOO3CFqGgmWgScxcg",
+        ];
+        let refs_expect: Vec<&str> = refs_expect
+            .into_iter()
+            .map(|i| core::str::from_utf8(i).unwrap())
+            .collect();
+        assert_eq!(refs, refs_expect);
+    }
+
+    #[test]
+    fn just_store() {
+        for i in [&StoreSpec::DFL_NIX2, &StoreSpec::DFL_YZIX1] {
+            let refs: Vec<&[u8]> = StoreRefScanner::new(i.path_to_store.as_bytes(), i).collect();
+            assert!(refs.is_empty());
+        }
+    }
+}
diff --git a/users/fogti/store-ref-scanner/src/spec.rs b/users/fogti/store-ref-scanner/src/spec.rs
new file mode 100644
index 000000000000..79da0842c529
--- /dev/null
+++ b/users/fogti/store-ref-scanner/src/spec.rs
@@ -0,0 +1,40 @@
+use crate::hbm::HalfBytesMask;
+
+pub struct StoreSpec<'path> {
+    /// path to store without trailing slash
+    pub path_to_store: &'path str,
+
+    /// compressed map of allowed ASCII characters in hash part
+    pub valid_hashbytes: HalfBytesMask,
+
+    /// compressed map of allowed ASCII characters in part after hash
+    pub valid_restbytes: HalfBytesMask,
+
+    /// exact length of hash part of store paths
+    pub hashbytes_len: u8,
+}
+
+impl StoreSpec<'_> {
+    pub(crate) fn check_rest(&self, rest: &[u8]) -> bool {
+        let hbl = self.hashbytes_len.into();
+        rest.iter()
+            .take(hbl)
+            .take_while(|&&i| self.valid_hashbytes.contains(i))
+            .count()
+            == hbl
+    }
+
+    pub const DFL_NIX2: StoreSpec<'static> = StoreSpec {
+        path_to_store: "/nix/store",
+        valid_hashbytes: HalfBytesMask::B32_REVSHA256,
+        valid_restbytes: HalfBytesMask::DFL_REST,
+        hashbytes_len: 32,
+    };
+
+    pub const DFL_YZIX1: StoreSpec<'static> = StoreSpec {
+        path_to_store: "/yzixs",
+        valid_hashbytes: HalfBytesMask::B64_BLAKE2B256,
+        valid_restbytes: HalfBytesMask::DFL_REST,
+        hashbytes_len: 43,
+    };
+}