diff options
Diffstat (limited to 'users/edef/refscan/src/lib.rs')
-rw-r--r-- | users/edef/refscan/src/lib.rs | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/users/edef/refscan/src/lib.rs b/users/edef/refscan/src/lib.rs new file mode 100644 index 000000000000..3d4a07f3dd1c --- /dev/null +++ b/users/edef/refscan/src/lib.rs @@ -0,0 +1,154 @@ +// SPDX-FileCopyrightText: edef <edef@edef.eu> +// SPDX-License-Identifier: MPL-2.0 + +use self::simd::u8x32; + +fn prefilter(haystack: u8x32) -> u32 { + let alp = haystack.gt(u8x32::splat(b'a' - 1)) & haystack.lt(u8x32::splat(b'z' + 1)); + let num = haystack.gt(u8x32::splat(b'0' - 1)) & haystack.lt(u8x32::splat(b'9' + 1)); + alp | num +} + +/// scan_clean returns `Err(&buffer[..n])` of known pointer-free data, +/// or `Ok(buffer)` if the entire buffer is pointer-free. +pub fn scan_clean(buffer: &[u8]) -> Result<&[u8], &[u8]> { + let buffer = { + let n = buffer.len() & !31; + &buffer[..n] + }; + + let mut masks = buffer + .chunks_exact(32) + .map(|chunk| prefilter(u8x32::from_slice_unaligned(chunk))) + .enumerate() + .map(|e| (e.0 * 32, e.1)) + .peekable(); + + while let Some((offset, mask)) = masks.next() { + let peek = masks.peek().map(|x| x.1).unwrap_or(!0 >> 1); + let n = (!mask).leading_zeros() + (!peek).trailing_zeros(); + if n >= 32 { + let offset = offset + mask.trailing_zeros() as usize; + return Err(&buffer[..offset]); + } + } + + Ok(buffer) +} + +#[cfg(test)] +mod test { + #[test] + fn scan_tail() { + let buffer = b"_xfbmj7sl2ikicym9x3yq7cms5qx1w39k"; + assert_eq!(crate::scan_clean(buffer), Err(&buffer[..1])); + } + #[test] + fn scan_straddle() { + let buffer = b"________________xfbmj7sl2ikicym9x3yq7cms5qx1w39k________________"; + assert_eq!(crate::scan_clean(buffer), Err(&buffer[..16])); + } + #[test] + fn scan_clean() { + let buffer = b"x_______________xfbmj7sl2ikicym9x3yq-cms5qx1w3-k________________"; + assert_eq!(crate::scan_clean(buffer), Ok(&buffer[..])); + } +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod simd { + #[cfg(target_arch = "x86")] + use std::arch::x86 as arch; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64 as arch; + use { + arch::{__m256i, _mm256_cmpgt_epi8, _mm256_movemask_epi8, _mm256_set1_epi8}, + std::ptr, + }; + + #[allow(non_camel_case_types)] + #[derive(Copy, Clone)] + pub struct u8x32(__m256i); + + impl u8x32 { + #[inline(always)] + pub fn from_slice_unaligned(slice: &[u8]) -> Self { + assert_eq!(slice.len(), 32); + u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) }) + } + + #[inline(always)] + pub fn splat(x: u8) -> Self { + u8x32(unsafe { _mm256_set1_epi8(x as i8) }) + } + + #[inline(always)] + pub fn gt(self, b: Self) -> u32 { + unsafe { _mm256_movemask_epi8(_mm256_cmpgt_epi8(self.0, b.0)) as u32 } + } + + #[inline(always)] + pub fn lt(self, b: Self) -> u32 { + b.gt(self) + } + } +} + +#[cfg(target_arch = "aarch64")] +mod simd { + use std::{ + arch::aarch64::{ + uint8x16_t as u8x16, vaddv_u8, vandq_u8, vcgtq_u8, vdupq_n_u8, vget_high_u8, + vget_low_u8, vshlq_u8, + }, + mem, ptr, + }; + + #[allow(non_camel_case_types)] + #[derive(Copy, Clone)] + #[repr(transparent)] + pub struct u8x32([u8x16; 2]); + + impl u8x32 { + #[cfg(target_endian = "little")] + #[inline(always)] + pub fn from_slice_unaligned(slice: &[u8]) -> Self { + assert_eq!(slice.len(), 32); + u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) }) + } + + #[inline(always)] + pub fn splat(x: u8) -> Self { + u8x32(unsafe { + let x = vdupq_n_u8(x); + [x, x] + }) + } + + #[inline(always)] + pub fn gt(&self, b: Self) -> u32 { + let u8x32([al, ah]) = *self; + let u8x32([bl, bh]) = b; + + fn f(a: u8x16, b: u8x16) -> u32 { + unsafe { + let c = vshlq_u8( + vandq_u8(vdupq_n_u8(0x80), vcgtq_u8(a, b)), + mem::transmute([ + -7, -6, -5, -4, -3, -2, -1, 0, -7, -6, -5, -4, -3, -2, -1, 0i8, + ]), + ); + + (vaddv_u8(vget_low_u8(c)) as u32) << 0 | (vaddv_u8(vget_high_u8(c)) as u32) << 8 + } + } + + f(al, bl) << 0 | f(ah, bh) << 16 + } + + #[inline(always)] + pub fn lt(self, b: Self) -> u32 { + b.gt(self) + } + } +} |