about summary refs log tree commit diff
path: root/users
diff options
context:
space:
mode:
authoredef <edef@edef.eu>2023-01-09T23·16+0000
committeredef <edef@edef.eu>2023-01-09T23·21+0000
commitcb764e87de3378cf1d179019853c7fcf6c0ecc45 (patch)
tree204b7ab75e0fe471523f4f6d7000e659d9d6ce70 /users
parent0b3c0725a28786c8d8f2bfc659e8f0a5beedb05a (diff)
refactor(users/edef/refscan): switch to intrinsics r/5637
packed_simd is deprecated, but we don't need very much SIMD:
  * _mm256_set1_epi8 / vpbroadcastb (splat)
  * _mm256_cmpgt_epi8 / vpcmpgtb (comparison)
  * _mm256_movemask_epi8 / vpmovmskb (compress to bitmask)

This also simplifies the code by only vectorising the bare minimum,
since we just get a bitmask and operate in scalar mode as soon as
possible.

We don't need nightly Rust anymore: we're using only stable intrinsics.

Change-Id: Id410b5fef2549f3c97f48049f722f1e643e68553
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7687
Reviewed-by: edef <edef@edef.eu>
Tested-by: BuildkiteCI
Diffstat (limited to 'users')
-rw-r--r--users/edef/refscan/Cargo.lock20
-rw-r--r--users/edef/refscan/Cargo.toml3
-rw-r--r--users/edef/refscan/src/lib.rs43
3 files changed, 41 insertions, 25 deletions
diff --git a/users/edef/refscan/Cargo.lock b/users/edef/refscan/Cargo.lock
index 6a079249b3c0..a3515a75d720 100644
--- a/users/edef/refscan/Cargo.lock
+++ b/users/edef/refscan/Cargo.lock
@@ -1,25 +1,7 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
-[[package]]
-name = "cfg-if"
-version = "0.1.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-
-[[package]]
-name = "packed_simd"
-version = "0.3.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
-]
+version = 3
 
 [[package]]
 name = "refscan"
 version = "0.1.0"
-dependencies = [
- "packed_simd 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
-[metadata]
-"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
-"checksum packed_simd 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a85ea9fc0d4ac0deb6fe7911d38786b32fc11119afd9e9d38b84ff691ce64220"
diff --git a/users/edef/refscan/Cargo.toml b/users/edef/refscan/Cargo.toml
index 778d9d24edb4..45fa5a080422 100644
--- a/users/edef/refscan/Cargo.toml
+++ b/users/edef/refscan/Cargo.toml
@@ -5,6 +5,3 @@ authors = ["edef <edef@edef.eu>"]
 edition = "2018"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-packed_simd = "0.3.3"
diff --git a/users/edef/refscan/src/lib.rs b/users/edef/refscan/src/lib.rs
index 79cf21a1b984..a926bb052ac9 100644
--- a/users/edef/refscan/src/lib.rs
+++ b/users/edef/refscan/src/lib.rs
@@ -1,6 +1,6 @@
-use packed_simd::{m8x32, u8x32};
+use self::simd::u8x32;
 
-fn prefilter(haystack: u8x32) -> m8x32 {
+fn prefilter(haystack: u8x32) -> u32 {
     let alp = haystack.gt(u8x32::splat(b'a' - 1)) & haystack.lt(u8x32::splat(b'z' + 1));
     let num = haystack.gt(u8x32::splat(b'0' - 1)) & haystack.lt(u8x32::splat(b'9' + 1));
     alp | num
@@ -16,7 +16,7 @@ pub fn scan_clean(buffer: &[u8]) -> Result<&[u8], &[u8]> {
 
     let mut masks = buffer
         .chunks_exact(32)
-        .map(|chunk| prefilter(u8x32::from_slice_unaligned(chunk)).bitmask())
+        .map(|chunk| prefilter(u8x32::from_slice_unaligned(chunk)))
         .enumerate()
         .map(|e| (e.0 * 32, e.1))
         .peekable();
@@ -51,3 +51,40 @@ mod test {
         assert_eq!(crate::scan_clean(buffer), Ok(&buffer[..]));
     }
 }
+
+mod simd {
+    #[cfg(target_arch = "x86")]
+    use std::arch::x86 as arch;
+    #[cfg(target_arch = "x86_64")]
+    use std::arch::x86_64 as arch;
+    use {
+        arch::{__m256i, _mm256_cmpgt_epi8, _mm256_movemask_epi8, _mm256_set1_epi8},
+        std::ptr,
+    };
+
+    #[derive(Copy, Clone)]
+    pub struct u8x32(__m256i);
+
+    impl u8x32 {
+        #[inline(always)]
+        pub fn from_slice_unaligned(slice: &[u8]) -> Self {
+            assert_eq!(slice.len(), 32);
+            u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) })
+        }
+
+        #[inline(always)]
+        pub fn splat(x: u8) -> Self {
+            u8x32(unsafe { _mm256_set1_epi8(x as i8) })
+        }
+
+        #[inline(always)]
+        pub fn gt(self, b: Self) -> u32 {
+            unsafe { _mm256_movemask_epi8(_mm256_cmpgt_epi8(self.0, b.0)) as u32 }
+        }
+
+        #[inline(always)]
+        pub fn lt(self, b: Self) -> u32 {
+            b.gt(self)
+        }
+    }
+}