about summary refs log tree commit diff
path: root/users/edef/refscan/src/lib.rs
diff options
context:
space:
mode:
authoredef <edef@edef.eu>2023-01-09T20·12+0000
committeredef <edef@edef.eu>2023-01-09T20·15+0000
commit0b3c0725a28786c8d8f2bfc659e8f0a5beedb05a (patch)
tree0d028b57aec6ef25be236992d0d06c9e4305a0c7 /users/edef/refscan/src/lib.rs
parent681800b438fa66f897759a197aba82f0122efcc3 (diff)
feat(users/edef/refscan): high-performance Nix reference scanner r/5636
Research-grade code, treat with care.

Change-Id: I99804df93e64101ef24928238ef0a8a02b59c2aa
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7686
Reviewed-by: edef <edef@edef.eu>
Tested-by: BuildkiteCI
Diffstat (limited to 'users/edef/refscan/src/lib.rs')
-rw-r--r--users/edef/refscan/src/lib.rs53
1 files changed, 53 insertions, 0 deletions
diff --git a/users/edef/refscan/src/lib.rs b/users/edef/refscan/src/lib.rs
new file mode 100644
index 000000000000..79cf21a1b984
--- /dev/null
+++ b/users/edef/refscan/src/lib.rs
@@ -0,0 +1,53 @@
+use packed_simd::{m8x32, u8x32};
+
+fn prefilter(haystack: u8x32) -> m8x32 {
+    let alp = haystack.gt(u8x32::splat(b'a' - 1)) & haystack.lt(u8x32::splat(b'z' + 1));
+    let num = haystack.gt(u8x32::splat(b'0' - 1)) & haystack.lt(u8x32::splat(b'9' + 1));
+    alp | num
+}
+
+/// scan_clean returns `Err(&buffer[..n])` of known pointer-free data,
+/// or `Ok(buffer)` if the entire buffer is pointer-free.
+pub fn scan_clean(buffer: &[u8]) -> Result<&[u8], &[u8]> {
+    let buffer = {
+        let n = buffer.len() & !31;
+        &buffer[..n]
+    };
+
+    let mut masks = buffer
+        .chunks_exact(32)
+        .map(|chunk| prefilter(u8x32::from_slice_unaligned(chunk)).bitmask())
+        .enumerate()
+        .map(|e| (e.0 * 32, e.1))
+        .peekable();
+
+    while let Some((offset, mask)) = masks.next() {
+        let peek = masks.peek().map(|x| x.1).unwrap_or(!0 >> 1);
+        let n = (!mask).leading_zeros() + (!peek).trailing_zeros();
+        if n >= 32 {
+            let offset = offset + mask.trailing_zeros() as usize;
+            return Err(&buffer[..offset]);
+        }
+    }
+
+    Ok(buffer)
+}
+
+#[cfg(test)]
+mod test {
+    #[test]
+    fn scan_tail() {
+        let buffer = b"_xfbmj7sl2ikicym9x3yq7cms5qx1w39k";
+        assert_eq!(crate::scan_clean(buffer), Err(&buffer[..1]));
+    }
+    #[test]
+    fn scan_straddle() {
+        let buffer = b"________________xfbmj7sl2ikicym9x3yq7cms5qx1w39k________________";
+        assert_eq!(crate::scan_clean(buffer), Err(&buffer[..16]));
+    }
+    #[test]
+    fn scan_clean() {
+        let buffer = b"x_______________xfbmj7sl2ikicym9x3yq-cms5qx1w3-k________________";
+        assert_eq!(crate::scan_clean(buffer), Ok(&buffer[..]));
+    }
+}