about summary refs log tree commit diff
path: root/users/edef/refscan/src/lib.rs
diff options
context:
space:
mode:
authoredef <edef@edef.eu>2023-01-11T18·57+0000
committeredef <edef@edef.eu>2023-01-11T20·10+0000
commitec470d254ffca9822cbfa3eb783a0c8f0f523f51 (patch)
treeec4b76c824dcaf850b1d14019a0c5c842758fb78 /users/edef/refscan/src/lib.rs
parent1afb4a9f44cbc27cfb5ddb9bb690739b38f0c73e (diff)
feat(users/edef/refscan): AArch64 support r/5646
Change-Id: I5062078739f0bf9f70c6789a9f2eafceff65d76e
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7690
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Diffstat (limited to 'users/edef/refscan/src/lib.rs')
-rw-r--r--users/edef/refscan/src/lib.rs60
1 files changed, 60 insertions, 0 deletions
diff --git a/users/edef/refscan/src/lib.rs b/users/edef/refscan/src/lib.rs
index fca4b290f1c4..3d4a07f3dd1c 100644
--- a/users/edef/refscan/src/lib.rs
+++ b/users/edef/refscan/src/lib.rs
@@ -55,6 +55,7 @@ mod test {
     }
 }
 
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 mod simd {
     #[cfg(target_arch = "x86")]
     use std::arch::x86 as arch;
@@ -92,3 +93,62 @@ mod simd {
         }
     }
 }
+
+#[cfg(target_arch = "aarch64")]
+mod simd {
+    use std::{
+        arch::aarch64::{
+            uint8x16_t as u8x16, vaddv_u8, vandq_u8, vcgtq_u8, vdupq_n_u8, vget_high_u8,
+            vget_low_u8, vshlq_u8,
+        },
+        mem, ptr,
+    };
+
+    #[allow(non_camel_case_types)]
+    #[derive(Copy, Clone)]
+    #[repr(transparent)]
+    pub struct u8x32([u8x16; 2]);
+
+    impl u8x32 {
+        #[cfg(target_endian = "little")]
+        #[inline(always)]
+        pub fn from_slice_unaligned(slice: &[u8]) -> Self {
+            assert_eq!(slice.len(), 32);
+            u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) })
+        }
+
+        #[inline(always)]
+        pub fn splat(x: u8) -> Self {
+            u8x32(unsafe {
+                let x = vdupq_n_u8(x);
+                [x, x]
+            })
+        }
+
+        #[inline(always)]
+        pub fn gt(&self, b: Self) -> u32 {
+            let u8x32([al, ah]) = *self;
+            let u8x32([bl, bh]) = b;
+
+            fn f(a: u8x16, b: u8x16) -> u32 {
+                unsafe {
+                    let c = vshlq_u8(
+                        vandq_u8(vdupq_n_u8(0x80), vcgtq_u8(a, b)),
+                        mem::transmute([
+                            -7, -6, -5, -4, -3, -2, -1, 0, -7, -6, -5, -4, -3, -2, -1, 0i8,
+                        ]),
+                    );
+
+                    (vaddv_u8(vget_low_u8(c)) as u32) << 0 | (vaddv_u8(vget_high_u8(c)) as u32) << 8
+                }
+            }
+
+            f(al, bl) << 0 | f(ah, bh) << 16
+        }
+
+        #[inline(always)]
+        pub fn lt(self, b: Self) -> u32 {
+            b.gt(self)
+        }
+    }
+}