diff options
Diffstat (limited to 'users/edef/refscan')
-rw-r--r-- | users/edef/refscan/.gitignore | 5 | ||||
-rw-r--r-- | users/edef/refscan/Cargo.lock | 7 | ||||
-rw-r--r-- | users/edef/refscan/Cargo.lock.license | 3 | ||||
-rw-r--r-- | users/edef/refscan/Cargo.toml | 10 | ||||
-rw-r--r-- | users/edef/refscan/LICENSES/CC0-1.0.txt | 121 | ||||
-rw-r--r-- | users/edef/refscan/LICENSES/MPL-2.0.txt | 373 | ||||
-rw-r--r-- | users/edef/refscan/src/lib.rs | 154 | ||||
-rw-r--r-- | users/edef/refscan/src/main.rs | 58 | ||||
-rw-r--r-- | users/edef/refscan/testdata/.gitignore | 6 | ||||
-rwxr-xr-x | users/edef/refscan/testdata/generate.sh | 8 |
10 files changed, 745 insertions, 0 deletions
diff --git a/users/edef/refscan/.gitignore b/users/edef/refscan/.gitignore new file mode 100644 index 000000000000..ee4b088aee93 --- /dev/null +++ b/users/edef/refscan/.gitignore @@ -0,0 +1,5 @@ +# SPDX-FileCopyrightText: edef <edef@edef.eu> +# SPDX-License-Identifier: CC0-1.0 + +/target +**/*.rs.bk diff --git a/users/edef/refscan/Cargo.lock b/users/edef/refscan/Cargo.lock new file mode 100644 index 000000000000..a3515a75d720 --- /dev/null +++ b/users/edef/refscan/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "refscan" +version = "0.1.0" diff --git a/users/edef/refscan/Cargo.lock.license b/users/edef/refscan/Cargo.lock.license new file mode 100644 index 000000000000..2cd6276751c0 --- /dev/null +++ b/users/edef/refscan/Cargo.lock.license @@ -0,0 +1,3 @@ +SPDX-FileCopyrightText: edef <edef@edef.eu> +SPDX-License-Identifier: CC0-1.0 + diff --git a/users/edef/refscan/Cargo.toml b/users/edef/refscan/Cargo.toml new file mode 100644 index 000000000000..dfac9a899ec9 --- /dev/null +++ b/users/edef/refscan/Cargo.toml @@ -0,0 +1,10 @@ +# SPDX-FileCopyrightText: edef <edef@edef.eu> +# SPDX-License-Identifier: MPL-2.0 + +[package] +name = "refscan" +version = "0.1.0" +authors = ["edef <edef@edef.eu>"] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/users/edef/refscan/LICENSES/CC0-1.0.txt b/users/edef/refscan/LICENSES/CC0-1.0.txt new file mode 100644 index 000000000000..0e259d42c996 --- /dev/null +++ b/users/edef/refscan/LICENSES/CC0-1.0.txt @@ -0,0 +1,121 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. diff --git a/users/edef/refscan/LICENSES/MPL-2.0.txt b/users/edef/refscan/LICENSES/MPL-2.0.txt new file mode 100644 index 000000000000..ee6256cdb62a --- /dev/null +++ b/users/edef/refscan/LICENSES/MPL-2.0.txt @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at https://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/users/edef/refscan/src/lib.rs b/users/edef/refscan/src/lib.rs new file mode 100644 index 000000000000..3d4a07f3dd1c --- /dev/null +++ b/users/edef/refscan/src/lib.rs @@ -0,0 +1,154 @@ +// SPDX-FileCopyrightText: edef <edef@edef.eu> +// SPDX-License-Identifier: MPL-2.0 + +use self::simd::u8x32; + +fn prefilter(haystack: u8x32) -> u32 { + let alp = haystack.gt(u8x32::splat(b'a' - 1)) & haystack.lt(u8x32::splat(b'z' + 1)); + let num = haystack.gt(u8x32::splat(b'0' - 1)) & haystack.lt(u8x32::splat(b'9' + 1)); + alp | num +} + +/// scan_clean returns `Err(&buffer[..n])` of known pointer-free data, +/// or `Ok(buffer)` if the entire buffer is pointer-free. +pub fn scan_clean(buffer: &[u8]) -> Result<&[u8], &[u8]> { + let buffer = { + let n = buffer.len() & !31; + &buffer[..n] + }; + + let mut masks = buffer + .chunks_exact(32) + .map(|chunk| prefilter(u8x32::from_slice_unaligned(chunk))) + .enumerate() + .map(|e| (e.0 * 32, e.1)) + .peekable(); + + while let Some((offset, mask)) = masks.next() { + let peek = masks.peek().map(|x| x.1).unwrap_or(!0 >> 1); + let n = (!mask).leading_zeros() + (!peek).trailing_zeros(); + if n >= 32 { + let offset = offset + mask.trailing_zeros() as usize; + return Err(&buffer[..offset]); + } + } + + Ok(buffer) +} + +#[cfg(test)] +mod test { + #[test] + fn scan_tail() { + let buffer = b"_xfbmj7sl2ikicym9x3yq7cms5qx1w39k"; + assert_eq!(crate::scan_clean(buffer), Err(&buffer[..1])); + } + #[test] + fn scan_straddle() { + let buffer = b"________________xfbmj7sl2ikicym9x3yq7cms5qx1w39k________________"; + assert_eq!(crate::scan_clean(buffer), Err(&buffer[..16])); + } + #[test] + fn scan_clean() { + let buffer = b"x_______________xfbmj7sl2ikicym9x3yq-cms5qx1w3-k________________"; + assert_eq!(crate::scan_clean(buffer), Ok(&buffer[..])); + } +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +mod simd { + #[cfg(target_arch = "x86")] + use std::arch::x86 as arch; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64 as arch; + use { + arch::{__m256i, _mm256_cmpgt_epi8, _mm256_movemask_epi8, _mm256_set1_epi8}, + std::ptr, + }; + + #[allow(non_camel_case_types)] + #[derive(Copy, Clone)] + pub struct u8x32(__m256i); + + impl u8x32 { + #[inline(always)] + pub fn from_slice_unaligned(slice: &[u8]) -> Self { + assert_eq!(slice.len(), 32); + u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) }) + } + + #[inline(always)] + pub fn splat(x: u8) -> Self { + u8x32(unsafe { _mm256_set1_epi8(x as i8) }) + } + + #[inline(always)] + pub fn gt(self, b: Self) -> u32 { + unsafe { _mm256_movemask_epi8(_mm256_cmpgt_epi8(self.0, b.0)) as u32 } + } + + #[inline(always)] + pub fn lt(self, b: Self) -> u32 { + b.gt(self) + } + } +} + +#[cfg(target_arch = "aarch64")] +mod simd { + use std::{ + arch::aarch64::{ + uint8x16_t as u8x16, vaddv_u8, vandq_u8, vcgtq_u8, vdupq_n_u8, vget_high_u8, + vget_low_u8, vshlq_u8, + }, + mem, ptr, + }; + + #[allow(non_camel_case_types)] + #[derive(Copy, Clone)] + #[repr(transparent)] + pub struct u8x32([u8x16; 2]); + + impl u8x32 { + #[cfg(target_endian = "little")] + #[inline(always)] + pub fn from_slice_unaligned(slice: &[u8]) -> Self { + assert_eq!(slice.len(), 32); + u8x32(unsafe { ptr::read_unaligned(slice.as_ptr().cast()) }) + } + + #[inline(always)] + pub fn splat(x: u8) -> Self { + u8x32(unsafe { + let x = vdupq_n_u8(x); + [x, x] + }) + } + + #[inline(always)] + pub fn gt(&self, b: Self) -> u32 { + let u8x32([al, ah]) = *self; + let u8x32([bl, bh]) = b; + + fn f(a: u8x16, b: u8x16) -> u32 { + unsafe { + let c = vshlq_u8( + vandq_u8(vdupq_n_u8(0x80), vcgtq_u8(a, b)), + mem::transmute([ + -7, -6, -5, -4, -3, -2, -1, 0, -7, -6, -5, -4, -3, -2, -1, 0i8, + ]), + ); + + (vaddv_u8(vget_low_u8(c)) as u32) << 0 | (vaddv_u8(vget_high_u8(c)) as u32) << 8 + } + } + + f(al, bl) << 0 | f(ah, bh) << 16 + } + + #[inline(always)] + pub fn lt(self, b: Self) -> u32 { + b.gt(self) + } + } +} diff --git a/users/edef/refscan/src/main.rs b/users/edef/refscan/src/main.rs new file mode 100644 index 000000000000..e572abf0a1ee --- /dev/null +++ b/users/edef/refscan/src/main.rs @@ -0,0 +1,58 @@ +// SPDX-FileCopyrightText: edef <edef@edef.eu> +// SPDX-License-Identifier: MPL-2.0 + +use std::{ + collections::BTreeSet as Set, + convert::TryInto, + io::{self, Read}, + str, +}; + +fn main() { + let max_refs: Set<[u8; 32]> = include_str!("../testdata/maxrefs") + .lines() + .map(|l| l.as_bytes().try_into().unwrap()) + .collect(); + + let input = { + let stdin = io::stdin(); + let mut buffer = Vec::new(); + stdin.lock().read_to_end(&mut buffer).unwrap(); + buffer + }; + + let base = input.as_ptr() as usize; + let mut input: &[u8] = &input; + while input.len() >= 32 { + match refscan::scan_clean(&input) { + Ok(buffer) | Err(buffer) => { + let n = buffer.len(); + input = &input[n..]; + } + } + + let buffer = { + let idx = input.iter().position(|x| match x { + b'a'..=b'z' | b'0'..=b'9' => false, + _ => true, + }); + idx.map(|idx| &input[..idx]).unwrap_or(input) + }; + + for chunk in buffer.windows(32) { + let offset = (chunk.as_ptr() as usize) - base; + let chunk = { + let mut fixed = [0u8; 32]; + fixed.copy_from_slice(chunk); + fixed + }; + if max_refs.contains(&chunk) { + let seen = unsafe { str::from_utf8_unchecked(&chunk) }; + println!("{} {}", seen, offset); + } + } + + let n = buffer.len(); + input = &input[n..]; + } +} diff --git a/users/edef/refscan/testdata/.gitignore b/users/edef/refscan/testdata/.gitignore new file mode 100644 index 000000000000..1d278bd6ce97 --- /dev/null +++ b/users/edef/refscan/testdata/.gitignore @@ -0,0 +1,6 @@ +# SPDX-FileCopyrightText: edef <edef@edef.eu> +# SPDX-License-Identifier: CC0-1.0 + +/maxrefs +/nar +/result diff --git a/users/edef/refscan/testdata/generate.sh b/users/edef/refscan/testdata/generate.sh new file mode 100755 index 000000000000..9f416024c181 --- /dev/null +++ b/users/edef/refscan/testdata/generate.sh @@ -0,0 +1,8 @@ +#! /usr/bin/env bash +# SPDX-FileCopyrightText: edef <edef@edef.eu> +# SPDX-License-Identifier: CC0-1.0 +set -euo pipefail + +drv=$(nix-instantiate '<nixpkgs>' -A ghc) +nix --extra-experimental-features nix-command show-derivation -r "$drv" | jq -r '.[] | .outputs[].path, .inputSrcs[]' | sort -u | cut -d/ -f4 | cut -d- -f1 > maxrefs +nix-store --dump "$(nix-build "$drv")" > nar |