about summary refs log tree commit diff
path: root/users/edef/refscan/src/main.rs
diff options
context:
space:
mode:
authoredef <edef@edef.eu>2023-01-09T20·12+0000
committeredef <edef@edef.eu>2023-01-09T20·15+0000
commit0b3c0725a28786c8d8f2bfc659e8f0a5beedb05a (patch)
tree0d028b57aec6ef25be236992d0d06c9e4305a0c7 /users/edef/refscan/src/main.rs
parent681800b438fa66f897759a197aba82f0122efcc3 (diff)
feat(users/edef/refscan): high-performance Nix reference scanner r/5636
Research-grade code, treat with care.

Change-Id: I99804df93e64101ef24928238ef0a8a02b59c2aa
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7686
Reviewed-by: edef <edef@edef.eu>
Tested-by: BuildkiteCI
Diffstat (limited to 'users/edef/refscan/src/main.rs')
-rw-r--r--users/edef/refscan/src/main.rs55
1 files changed, 55 insertions, 0 deletions
diff --git a/users/edef/refscan/src/main.rs b/users/edef/refscan/src/main.rs
new file mode 100644
index 000000000000..9bbb5ed82312
--- /dev/null
+++ b/users/edef/refscan/src/main.rs
@@ -0,0 +1,55 @@
+use std::{
+    collections::BTreeSet as Set,
+    convert::TryInto,
+    io::{self, Read},
+    str,
+};
+
+fn main() {
+    let max_refs: Set<[u8; 32]> = include_str!("../testdata/maxrefs")
+        .lines()
+        .map(|l| l.as_bytes().try_into().unwrap())
+        .collect();
+
+    let input = {
+        let stdin = io::stdin();
+        let mut buffer = Vec::new();
+        stdin.lock().read_to_end(&mut buffer).unwrap();
+        buffer
+    };
+
+    let base = input.as_ptr() as usize;
+    let mut input: &[u8] = &input;
+    while input.len() >= 32 {
+        match refscan::scan_clean(&input) {
+            Ok(buffer) | Err(buffer) => {
+                let n = buffer.len();
+                input = &input[n..];
+            }
+        }
+
+        let buffer = {
+            let idx = input.iter().position(|x| match x {
+                b'a'..=b'z' | b'0'..=b'9' => false,
+                _ => true,
+            });
+            idx.map(|idx| &input[..idx]).unwrap_or(input)
+        };
+
+        for chunk in buffer.windows(32) {
+            let offset = (chunk.as_ptr() as usize) - base;
+            let chunk = {
+                let mut fixed = [0u8; 32];
+                fixed.copy_from_slice(chunk);
+                fixed
+            };
+            if max_refs.contains(&chunk) {
+                let seen = unsafe { str::from_utf8_unchecked(&chunk) };
+                println!("{} {}", seen, offset);
+            }
+        }
+
+        let n = buffer.len();
+        input = &input[n..];
+    }
+}