From cb764e87de3378cf1d179019853c7fcf6c0ecc45 Mon Sep 17 00:00:00 2001 From: edef Date: Mon, 9 Jan 2023 23:16:21 +0000 Subject: refactor(users/edef/refscan): switch to intrinsics packed_simd is deprecated, but we don't need very much SIMD: * _mm256_set1_epi8 / vpbroadcastb (splat) * _mm256_cmpgt_epi8 / vpcmpgtb (comparison) * _mm256_movemask_epi8 / vpmovmskb (compress to bitmask) This also simplifies the code by only vectorising the bare minimum, since we just get a bitmask and operate in scalar mode as soon as possible. We don't need nightly Rust anymore: we're using only stable intrinsics. Change-Id: Id410b5fef2549f3c97f48049f722f1e643e68553 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7687 Reviewed-by: edef Tested-by: BuildkiteCI --- users/edef/refscan/Cargo.lock | 20 +------------------- users/edef/refscan/Cargo.toml | 3 --- users/edef/refscan/src/lib.rs | 43 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 41 insertions(+), 25 deletions(-) diff --git a/users/edef/refscan/Cargo.lock b/users/edef/refscan/Cargo.lock index 6a079249b3c0..a3515a75d720 100644 --- a/users/edef/refscan/Cargo.lock +++ b/users/edef/refscan/Cargo.lock @@ -1,25 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -[[package]] -name = "cfg-if" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "packed_simd" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", -] +version = 3 [[package]] name = "refscan" version = "0.1.0" -dependencies = [ - "packed_simd 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[metadata] -"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" -"checksum packed_simd 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a85ea9fc0d4ac0deb6fe7911d38786b32fc11119afd9e9d38b84ff691ce64220" diff --git a/users/edef/refscan/Cargo.toml b/users/edef/refscan/Cargo.toml index 778d9d24edb4..45fa5a080422 100644 --- a/users/edef/refscan/Cargo.toml +++ b/users/edef/refscan/Cargo.toml @@ -5,6 +5,3 @@ authors = ["edef "] edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -packed_simd = "0.3.3" diff --git a/users/edef/refscan/src/lib.rs b/users/edef/refscan/src/lib.rs index 79cf21a1b984..a926bb052ac9 100644 --- a/users/edef/refscan/src/lib.rs +++ b/users/edef/refscan/src/lib.rs @@ -1,6 +1,6 @@ -use packed_simd::{m8x32, u8x32}; +use self::simd::u8x32; -fn prefilter(haystack: u8x32) -> m8x32 { +fn prefilter(haystack: u8x32) -> u32 { let alp = haystack.gt(u8x32::splat(b'a' - 1)) & haystack.lt(u8x32::splat(b'z' + 1)); let num = haystack.gt(u8x32::splat(b'0' - 1)) & haystack.lt(u8x32::splat(b'9' + 1)); alp | num @@ -16,7 +16,7 @@ pub fn scan_clean(buffer: &[u8]) -> Result<&[u8], &[u8]> { let mut masks = buffer .chunks_exact(32) - .map(|chunk| prefilter(u8x32::from_slice_unaligned(chunk)).bitmask()) + .map(|chunk| prefilter(u8x32::from_slice_unaligned(chunk))) .enumerate() .map(|e| (e.0 * 32, e.1)) .peekable(); @@ -51,3 +51,40 @@ mod test { assert_eq!(crate::scan_clean(buffer), Ok(&buffer[..])); } } + +mod simd { + #[cfg(target_arch = "x86")] + use std::arch::x86 as arch; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64 as arch; + use { + arch::{__m256i, _mm256_cmpgt_epi8, _mm256_movemask_epi8, _mm256_set1_epi8}, + std::ptr, + }; + + #[derive(Copy, Clone)] + pub struct u8x32(__m256i); + + impl u8x32 { + #[inline(always)] + pub fn from_slice_unaligned(slice: &[u8]) -> Self { + assert_eq!(slice.len(), 32); + u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) }) + } + + #[inline(always)] + pub fn splat(x: u8) -> Self { + u8x32(unsafe { _mm256_set1_epi8(x as i8) }) + } + + #[inline(always)] + pub fn gt(self, b: Self) -> u32 { + unsafe { _mm256_movemask_epi8(_mm256_cmpgt_epi8(self.0, b.0)) as u32 } + } + + #[inline(always)] + pub fn lt(self, b: Self) -> u32 { + b.gt(self) + } + } +} -- cgit 1.4.1