From b38be028d96ce107439f3323026270228a871a13 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Thu, 25 Jan 2024 14:47:07 +0200 Subject: feat(tvix/tools/crunch-v2): add CLI args Use clap derive to make the input and output files configurable, as well as the chunk size parameters. Change-Id: I02b29126f3bd2c13ba2c6e7e0aa4ff048ff803ed Reviewed-on: https://cl.tvl.fyi/c/depot/+/10691 Autosubmit: flokli Tested-by: BuildkiteCI Reviewed-by: edef --- tvix/tools/crunch-v2/Cargo.lock | 173 +++++++++ tvix/tools/crunch-v2/Cargo.nix | 631 +++++++++++++++++++++++++++++++- tvix/tools/crunch-v2/Cargo.toml | 1 + tvix/tools/crunch-v2/src/bin/extract.rs | 34 +- tvix/tools/crunch-v2/src/lib.rs | 7 - tvix/tools/crunch-v2/src/main.rs | 71 +++- 6 files changed, 884 insertions(+), 33 deletions(-) diff --git a/tvix/tools/crunch-v2/Cargo.lock b/tvix/tools/crunch-v2/Cargo.lock index 617b4f21cdd7..cff5509d0b06 100644 --- a/tvix/tools/crunch-v2/Cargo.lock +++ b/tvix/tools/crunch-v2/Cargo.lock @@ -75,6 +75,54 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e2e1ebcb11de5c03c67de28a7df593d32191b44939c482e97702baaaa6ab6a5" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + [[package]] name = "anyhow" version = "1.0.75" @@ -371,6 +419,52 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "clap" +version = "4.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e578d6ec4194633722ccf9544794b71b1385c3c027efe0c55db226fc880865c" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4df4df40ec50c46000231c914968278b1eb05098cf8f1b3a518a95030e71d1c7" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.39", +] + +[[package]] +name = "clap_lex" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + [[package]] name = "console" version = "0.15.7" @@ -492,6 +586,7 @@ dependencies = [ "bstr", "bytes", "bzip2", + "clap", "digest 0.10.7", "fastcdc", "futures", @@ -2485,6 +2580,12 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe895eb47f22e2ddd4dabc02bce419d2e643c8e3b585c78158b349195bc24d82" +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + [[package]] name = "strum_macros" version = "0.25.3" @@ -2695,6 +2796,12 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + [[package]] name = "version_check" version = "0.9.4" @@ -2851,6 +2958,15 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + [[package]] name = "windows-targets" version = "0.42.2" @@ -2881,6 +2997,21 @@ dependencies = [ "windows_x86_64_msvc 0.48.5", ] +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" @@ -2893,6 +3024,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + [[package]] name = "windows_aarch64_msvc" version = "0.42.2" @@ -2905,6 +3042,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + [[package]] name = "windows_i686_gnu" version = "0.42.2" @@ -2917,6 +3060,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + [[package]] name = "windows_i686_msvc" version = "0.42.2" @@ -2929,6 +3078,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + [[package]] name = "windows_x86_64_gnu" version = "0.42.2" @@ -2941,6 +3096,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" @@ -2953,6 +3114,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + [[package]] name = "windows_x86_64_msvc" version = "0.42.2" @@ -2965,6 +3132,12 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + [[package]] name = "xml-rs" version = "0.8.19" diff --git a/tvix/tools/crunch-v2/Cargo.nix b/tvix/tools/crunch-v2/Cargo.nix index 3fe5b155c90b..35c09ecee711 100644 --- a/tvix/tools/crunch-v2/Cargo.nix +++ b/tvix/tools/crunch-v2/Cargo.nix @@ -1,5 +1,5 @@ # This file was @generated by crate2nix 0.13.0 with the command: -# "generate" +# "generate" "--all-features" # See https://github.com/kolloch/crate2nix for more info. { nixpkgs ? @@ -267,6 +267,110 @@ rec { } ]; + }; + "anstream" = rec { + crateName = "anstream"; + version = "0.6.11"; + edition = "2021"; + sha256 = "19dndamalavhjwp4i74k8hdijcixb7gsfa6ycwyc1r8xn6y1wbkf"; + dependencies = [ + { + name = "anstyle"; + packageId = "anstyle"; + } + { + name = "anstyle-parse"; + packageId = "anstyle-parse"; + } + { + name = "anstyle-query"; + packageId = "anstyle-query"; + optional = true; + } + { + name = "anstyle-wincon"; + packageId = "anstyle-wincon"; + optional = true; + target = { target, features }: (target."windows" or false); + } + { + name = "colorchoice"; + packageId = "colorchoice"; + } + { + name = "utf8parse"; + packageId = "utf8parse"; + } + ]; + features = { + "auto" = [ "dep:anstyle-query" ]; + "default" = [ "auto" "wincon" ]; + "wincon" = [ "dep:anstyle-wincon" ]; + }; + resolvedDefaultFeatures = [ "auto" "default" "wincon" ]; + }; + "anstyle" = rec { + crateName = "anstyle"; + version = "1.0.4"; + edition = "2021"; + sha256 = "11yxw02b6parn29s757z96rgiqbn8qy0fk9a3p3bhczm85dhfybh"; + features = { + "default" = [ "std" ]; + }; + resolvedDefaultFeatures = [ "default" "std" ]; + }; + "anstyle-parse" = rec { + crateName = "anstyle-parse"; + version = "0.2.3"; + edition = "2021"; + sha256 = "134jhzrz89labrdwxxnjxqjdg06qvaflj1wkfnmyapwyldfwcnn7"; + dependencies = [ + { + name = "utf8parse"; + packageId = "utf8parse"; + optional = true; + } + ]; + features = { + "core" = [ "dep:arrayvec" ]; + "default" = [ "utf8" ]; + "utf8" = [ "dep:utf8parse" ]; + }; + resolvedDefaultFeatures = [ "default" "utf8" ]; + }; + "anstyle-query" = rec { + crateName = "anstyle-query"; + version = "1.0.2"; + edition = "2021"; + sha256 = "0j3na4b1nma39g4x7cwvj009awxckjf3z2vkwhldgka44hqj72g2"; + dependencies = [ + { + name = "windows-sys"; + packageId = "windows-sys 0.52.0"; + target = { target, features }: (target."windows" or false); + features = [ "Win32_System_Console" "Win32_Foundation" ]; + } + ]; + + }; + "anstyle-wincon" = rec { + crateName = "anstyle-wincon"; + version = "3.0.2"; + edition = "2021"; + sha256 = "19v0fv400bmp4niqpzxnhg83vz12mmqv7l2l8vi80qcdxj0lpm8w"; + dependencies = [ + { + name = "anstyle"; + packageId = "anstyle"; + } + { + name = "windows-sys"; + packageId = "windows-sys 0.52.0"; + target = { target, features }: (target."windows" or false); + features = [ "Win32_System_Console" "Win32_Foundation" ]; + } + ]; + }; "anyhow" = rec { crateName = "anyhow"; @@ -1059,6 +1163,130 @@ rec { }; resolvedDefaultFeatures = [ "android-tzdata" "clock" "iana-time-zone" "serde" "std" "winapi" "windows-targets" ]; }; + "clap" = rec { + crateName = "clap"; + version = "4.4.18"; + edition = "2021"; + crateBin = [ ]; + sha256 = "0p46h346y8nval6gwzh27if3icbi9dwl95fg5ir36ihrqip8smqy"; + dependencies = [ + { + name = "clap_builder"; + packageId = "clap_builder"; + usesDefaultFeatures = false; + } + { + name = "clap_derive"; + packageId = "clap_derive"; + optional = true; + } + ]; + features = { + "cargo" = [ "clap_builder/cargo" ]; + "color" = [ "clap_builder/color" ]; + "debug" = [ "clap_builder/debug" "clap_derive?/debug" ]; + "default" = [ "std" "color" "help" "usage" "error-context" "suggestions" ]; + "deprecated" = [ "clap_builder/deprecated" "clap_derive?/deprecated" ]; + "derive" = [ "dep:clap_derive" ]; + "env" = [ "clap_builder/env" ]; + "error-context" = [ "clap_builder/error-context" ]; + "help" = [ "clap_builder/help" ]; + "std" = [ "clap_builder/std" ]; + "string" = [ "clap_builder/string" ]; + "suggestions" = [ "clap_builder/suggestions" ]; + "unicode" = [ "clap_builder/unicode" ]; + "unstable-doc" = [ "clap_builder/unstable-doc" "derive" ]; + "unstable-styles" = [ "clap_builder/unstable-styles" ]; + "unstable-v5" = [ "clap_builder/unstable-v5" "clap_derive?/unstable-v5" "deprecated" ]; + "usage" = [ "clap_builder/usage" ]; + "wrap_help" = [ "clap_builder/wrap_help" ]; + }; + resolvedDefaultFeatures = [ "color" "default" "derive" "error-context" "help" "std" "suggestions" "usage" ]; + }; + "clap_builder" = rec { + crateName = "clap_builder"; + version = "4.4.18"; + edition = "2021"; + sha256 = "1iyif47075caa4x1p3ygk18b07lb4xl4k48w4c061i2hxi0dzx2d"; + dependencies = [ + { + name = "anstream"; + packageId = "anstream"; + optional = true; + } + { + name = "anstyle"; + packageId = "anstyle"; + } + { + name = "clap_lex"; + packageId = "clap_lex"; + } + { + name = "strsim"; + packageId = "strsim"; + optional = true; + } + ]; + features = { + "color" = [ "dep:anstream" ]; + "debug" = [ "dep:backtrace" ]; + "default" = [ "std" "color" "help" "usage" "error-context" "suggestions" ]; + "std" = [ "anstyle/std" ]; + "suggestions" = [ "dep:strsim" "error-context" ]; + "unicode" = [ "dep:unicode-width" "dep:unicase" ]; + "unstable-doc" = [ "cargo" "wrap_help" "env" "unicode" "string" ]; + "unstable-styles" = [ "color" ]; + "unstable-v5" = [ "deprecated" ]; + "wrap_help" = [ "help" "dep:terminal_size" ]; + }; + resolvedDefaultFeatures = [ "color" "error-context" "help" "std" "suggestions" "usage" ]; + }; + "clap_derive" = rec { + crateName = "clap_derive"; + version = "4.4.7"; + edition = "2021"; + sha256 = "0hk4hcxl56qwqsf4hmf7c0gr19r9fbxk0ah2bgkr36pmmaph966g"; + procMacro = true; + dependencies = [ + { + name = "heck"; + packageId = "heck"; + } + { + name = "proc-macro2"; + packageId = "proc-macro2"; + } + { + name = "quote"; + packageId = "quote"; + } + { + name = "syn"; + packageId = "syn 2.0.39"; + features = [ "full" ]; + } + ]; + features = { + "raw-deprecated" = [ "deprecated" ]; + "unstable-v5" = [ "deprecated" ]; + }; + resolvedDefaultFeatures = [ "default" ]; + }; + "clap_lex" = rec { + crateName = "clap_lex"; + version = "0.6.0"; + edition = "2021"; + sha256 = "1l8bragdvim7mva9flvd159dskn2bdkpl0jqrr41wnjfn8pcfbvh"; + + }; + "colorchoice" = rec { + crateName = "colorchoice"; + version = "1.0.0"; + edition = "2021"; + sha256 = "1ix7w85kwvyybwi2jdkl3yva2r2bvdcc3ka2grjfzfgrapqimgxc"; + + }; "console" = rec { crateName = "console"; version = "0.15.7"; @@ -1390,6 +1618,11 @@ rec { name = "bzip2"; packageId = "bzip2"; } + { + name = "clap"; + packageId = "clap"; + features = [ "derive" ]; + } { name = "digest"; packageId = "digest 0.10.7"; @@ -8114,6 +8347,16 @@ rec { "Elliott Mahler " ]; + }; + "strsim" = rec { + crateName = "strsim"; + version = "0.10.0"; + edition = "2015"; + sha256 = "08s69r4rcrahwnickvi0kq49z524ci50capybln83mg6b473qivk"; + authors = [ + "Danny Guo " + ]; + }; "strum_macros" = rec { crateName = "strum_macros"; @@ -8743,6 +8986,18 @@ rec { ]; }; + "utf8parse" = rec { + crateName = "utf8parse"; + version = "0.2.1"; + edition = "2018"; + sha256 = "02ip1a0az0qmc2786vxk2nqwsgcwf17d3a38fkf0q7hrmwh9c6vi"; + authors = [ + "Joe Wilm " + "Christian Duerr " + ]; + features = { }; + resolvedDefaultFeatures = [ "default" ]; + }; "version_check" = rec { crateName = "version_check"; version = "0.9.4"; @@ -10103,6 +10358,253 @@ rec { }; resolvedDefaultFeatures = [ "Win32" "Win32_Foundation" "Win32_NetworkManagement" "Win32_NetworkManagement_IpHelper" "Win32_Networking" "Win32_Networking_WinSock" "Win32_Security" "Win32_Security_Authentication" "Win32_Security_Authentication_Identity" "Win32_Security_Credentials" "Win32_Security_Cryptography" "Win32_Storage" "Win32_Storage_FileSystem" "Win32_System" "Win32_System_Console" "Win32_System_Diagnostics" "Win32_System_Diagnostics_Debug" "Win32_System_IO" "Win32_System_Memory" "Win32_System_Pipes" "Win32_System_SystemServices" "Win32_System_Threading" "Win32_System_WindowsProgramming" "Win32_UI" "Win32_UI_Shell" "default" ]; }; + "windows-sys 0.52.0" = rec { + crateName = "windows-sys"; + version = "0.52.0"; + edition = "2021"; + sha256 = "0gd3v4ji88490zgb6b5mq5zgbvwv7zx1ibn8v3x83rwcdbryaar8"; + authors = [ + "Microsoft" + ]; + dependencies = [ + { + name = "windows-targets"; + packageId = "windows-targets 0.52.0"; + } + ]; + features = { + "Wdk_Foundation" = [ "Wdk" ]; + "Wdk_Graphics" = [ "Wdk" ]; + "Wdk_Graphics_Direct3D" = [ "Wdk_Graphics" ]; + "Wdk_Storage" = [ "Wdk" ]; + "Wdk_Storage_FileSystem" = [ "Wdk_Storage" ]; + "Wdk_Storage_FileSystem_Minifilters" = [ "Wdk_Storage_FileSystem" ]; + "Wdk_System" = [ "Wdk" ]; + "Wdk_System_IO" = [ "Wdk_System" ]; + "Wdk_System_OfflineRegistry" = [ "Wdk_System" ]; + "Wdk_System_Registry" = [ "Wdk_System" ]; + "Wdk_System_SystemInformation" = [ "Wdk_System" ]; + "Wdk_System_SystemServices" = [ "Wdk_System" ]; + "Wdk_System_Threading" = [ "Wdk_System" ]; + "Win32_Data" = [ "Win32" ]; + "Win32_Data_HtmlHelp" = [ "Win32_Data" ]; + "Win32_Data_RightsManagement" = [ "Win32_Data" ]; + "Win32_Devices" = [ "Win32" ]; + "Win32_Devices_AllJoyn" = [ "Win32_Devices" ]; + "Win32_Devices_BiometricFramework" = [ "Win32_Devices" ]; + "Win32_Devices_Bluetooth" = [ "Win32_Devices" ]; + "Win32_Devices_Communication" = [ "Win32_Devices" ]; + "Win32_Devices_DeviceAndDriverInstallation" = [ "Win32_Devices" ]; + "Win32_Devices_DeviceQuery" = [ "Win32_Devices" ]; + "Win32_Devices_Display" = [ "Win32_Devices" ]; + "Win32_Devices_Enumeration" = [ "Win32_Devices" ]; + "Win32_Devices_Enumeration_Pnp" = [ "Win32_Devices_Enumeration" ]; + "Win32_Devices_Fax" = [ "Win32_Devices" ]; + "Win32_Devices_HumanInterfaceDevice" = [ "Win32_Devices" ]; + "Win32_Devices_PortableDevices" = [ "Win32_Devices" ]; + "Win32_Devices_Properties" = [ "Win32_Devices" ]; + "Win32_Devices_Pwm" = [ "Win32_Devices" ]; + "Win32_Devices_Sensors" = [ "Win32_Devices" ]; + "Win32_Devices_SerialCommunication" = [ "Win32_Devices" ]; + "Win32_Devices_Tapi" = [ "Win32_Devices" ]; + "Win32_Devices_Usb" = [ "Win32_Devices" ]; + "Win32_Devices_WebServicesOnDevices" = [ "Win32_Devices" ]; + "Win32_Foundation" = [ "Win32" ]; + "Win32_Gaming" = [ "Win32" ]; + "Win32_Globalization" = [ "Win32" ]; + "Win32_Graphics" = [ "Win32" ]; + "Win32_Graphics_Dwm" = [ "Win32_Graphics" ]; + "Win32_Graphics_Gdi" = [ "Win32_Graphics" ]; + "Win32_Graphics_GdiPlus" = [ "Win32_Graphics" ]; + "Win32_Graphics_Hlsl" = [ "Win32_Graphics" ]; + "Win32_Graphics_OpenGL" = [ "Win32_Graphics" ]; + "Win32_Graphics_Printing" = [ "Win32_Graphics" ]; + "Win32_Graphics_Printing_PrintTicket" = [ "Win32_Graphics_Printing" ]; + "Win32_Management" = [ "Win32" ]; + "Win32_Management_MobileDeviceManagementRegistration" = [ "Win32_Management" ]; + "Win32_Media" = [ "Win32" ]; + "Win32_Media_Audio" = [ "Win32_Media" ]; + "Win32_Media_DxMediaObjects" = [ "Win32_Media" ]; + "Win32_Media_KernelStreaming" = [ "Win32_Media" ]; + "Win32_Media_Multimedia" = [ "Win32_Media" ]; + "Win32_Media_Streaming" = [ "Win32_Media" ]; + "Win32_Media_WindowsMediaFormat" = [ "Win32_Media" ]; + "Win32_NetworkManagement" = [ "Win32" ]; + "Win32_NetworkManagement_Dhcp" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_Dns" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_InternetConnectionWizard" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_IpHelper" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_Multicast" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_Ndis" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_NetBios" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_NetManagement" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_NetShell" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_NetworkDiagnosticsFramework" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_P2P" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_QoS" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_Rras" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_Snmp" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_WNet" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_WebDav" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_WiFi" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_WindowsConnectionManager" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_WindowsFilteringPlatform" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_WindowsFirewall" = [ "Win32_NetworkManagement" ]; + "Win32_NetworkManagement_WindowsNetworkVirtualization" = [ "Win32_NetworkManagement" ]; + "Win32_Networking" = [ "Win32" ]; + "Win32_Networking_ActiveDirectory" = [ "Win32_Networking" ]; + "Win32_Networking_Clustering" = [ "Win32_Networking" ]; + "Win32_Networking_HttpServer" = [ "Win32_Networking" ]; + "Win32_Networking_Ldap" = [ "Win32_Networking" ]; + "Win32_Networking_WebSocket" = [ "Win32_Networking" ]; + "Win32_Networking_WinHttp" = [ "Win32_Networking" ]; + "Win32_Networking_WinInet" = [ "Win32_Networking" ]; + "Win32_Networking_WinSock" = [ "Win32_Networking" ]; + "Win32_Networking_WindowsWebServices" = [ "Win32_Networking" ]; + "Win32_Security" = [ "Win32" ]; + "Win32_Security_AppLocker" = [ "Win32_Security" ]; + "Win32_Security_Authentication" = [ "Win32_Security" ]; + "Win32_Security_Authentication_Identity" = [ "Win32_Security_Authentication" ]; + "Win32_Security_Authorization" = [ "Win32_Security" ]; + "Win32_Security_Credentials" = [ "Win32_Security" ]; + "Win32_Security_Cryptography" = [ "Win32_Security" ]; + "Win32_Security_Cryptography_Catalog" = [ "Win32_Security_Cryptography" ]; + "Win32_Security_Cryptography_Certificates" = [ "Win32_Security_Cryptography" ]; + "Win32_Security_Cryptography_Sip" = [ "Win32_Security_Cryptography" ]; + "Win32_Security_Cryptography_UI" = [ "Win32_Security_Cryptography" ]; + "Win32_Security_DiagnosticDataQuery" = [ "Win32_Security" ]; + "Win32_Security_DirectoryServices" = [ "Win32_Security" ]; + "Win32_Security_EnterpriseData" = [ "Win32_Security" ]; + "Win32_Security_ExtensibleAuthenticationProtocol" = [ "Win32_Security" ]; + "Win32_Security_Isolation" = [ "Win32_Security" ]; + "Win32_Security_LicenseProtection" = [ "Win32_Security" ]; + "Win32_Security_NetworkAccessProtection" = [ "Win32_Security" ]; + "Win32_Security_WinTrust" = [ "Win32_Security" ]; + "Win32_Security_WinWlx" = [ "Win32_Security" ]; + "Win32_Storage" = [ "Win32" ]; + "Win32_Storage_Cabinets" = [ "Win32_Storage" ]; + "Win32_Storage_CloudFilters" = [ "Win32_Storage" ]; + "Win32_Storage_Compression" = [ "Win32_Storage" ]; + "Win32_Storage_DistributedFileSystem" = [ "Win32_Storage" ]; + "Win32_Storage_FileHistory" = [ "Win32_Storage" ]; + "Win32_Storage_FileSystem" = [ "Win32_Storage" ]; + "Win32_Storage_Imapi" = [ "Win32_Storage" ]; + "Win32_Storage_IndexServer" = [ "Win32_Storage" ]; + "Win32_Storage_InstallableFileSystems" = [ "Win32_Storage" ]; + "Win32_Storage_IscsiDisc" = [ "Win32_Storage" ]; + "Win32_Storage_Jet" = [ "Win32_Storage" ]; + "Win32_Storage_Nvme" = [ "Win32_Storage" ]; + "Win32_Storage_OfflineFiles" = [ "Win32_Storage" ]; + "Win32_Storage_OperationRecorder" = [ "Win32_Storage" ]; + "Win32_Storage_Packaging" = [ "Win32_Storage" ]; + "Win32_Storage_Packaging_Appx" = [ "Win32_Storage_Packaging" ]; + "Win32_Storage_ProjectedFileSystem" = [ "Win32_Storage" ]; + "Win32_Storage_StructuredStorage" = [ "Win32_Storage" ]; + "Win32_Storage_Vhd" = [ "Win32_Storage" ]; + "Win32_Storage_Xps" = [ "Win32_Storage" ]; + "Win32_System" = [ "Win32" ]; + "Win32_System_AddressBook" = [ "Win32_System" ]; + "Win32_System_Antimalware" = [ "Win32_System" ]; + "Win32_System_ApplicationInstallationAndServicing" = [ "Win32_System" ]; + "Win32_System_ApplicationVerifier" = [ "Win32_System" ]; + "Win32_System_ClrHosting" = [ "Win32_System" ]; + "Win32_System_Com" = [ "Win32_System" ]; + "Win32_System_Com_Marshal" = [ "Win32_System_Com" ]; + "Win32_System_Com_StructuredStorage" = [ "Win32_System_Com" ]; + "Win32_System_Com_Urlmon" = [ "Win32_System_Com" ]; + "Win32_System_ComponentServices" = [ "Win32_System" ]; + "Win32_System_Console" = [ "Win32_System" ]; + "Win32_System_CorrelationVector" = [ "Win32_System" ]; + "Win32_System_DataExchange" = [ "Win32_System" ]; + "Win32_System_DeploymentServices" = [ "Win32_System" ]; + "Win32_System_DeveloperLicensing" = [ "Win32_System" ]; + "Win32_System_Diagnostics" = [ "Win32_System" ]; + "Win32_System_Diagnostics_Ceip" = [ "Win32_System_Diagnostics" ]; + "Win32_System_Diagnostics_Debug" = [ "Win32_System_Diagnostics" ]; + "Win32_System_Diagnostics_Debug_Extensions" = [ "Win32_System_Diagnostics_Debug" ]; + "Win32_System_Diagnostics_Etw" = [ "Win32_System_Diagnostics" ]; + "Win32_System_Diagnostics_ProcessSnapshotting" = [ "Win32_System_Diagnostics" ]; + "Win32_System_Diagnostics_ToolHelp" = [ "Win32_System_Diagnostics" ]; + "Win32_System_DistributedTransactionCoordinator" = [ "Win32_System" ]; + "Win32_System_Environment" = [ "Win32_System" ]; + "Win32_System_ErrorReporting" = [ "Win32_System" ]; + "Win32_System_EventCollector" = [ "Win32_System" ]; + "Win32_System_EventLog" = [ "Win32_System" ]; + "Win32_System_EventNotificationService" = [ "Win32_System" ]; + "Win32_System_GroupPolicy" = [ "Win32_System" ]; + "Win32_System_HostCompute" = [ "Win32_System" ]; + "Win32_System_HostComputeNetwork" = [ "Win32_System" ]; + "Win32_System_HostComputeSystem" = [ "Win32_System" ]; + "Win32_System_Hypervisor" = [ "Win32_System" ]; + "Win32_System_IO" = [ "Win32_System" ]; + "Win32_System_Iis" = [ "Win32_System" ]; + "Win32_System_Ioctl" = [ "Win32_System" ]; + "Win32_System_JobObjects" = [ "Win32_System" ]; + "Win32_System_Js" = [ "Win32_System" ]; + "Win32_System_Kernel" = [ "Win32_System" ]; + "Win32_System_LibraryLoader" = [ "Win32_System" ]; + "Win32_System_Mailslots" = [ "Win32_System" ]; + "Win32_System_Mapi" = [ "Win32_System" ]; + "Win32_System_Memory" = [ "Win32_System" ]; + "Win32_System_Memory_NonVolatile" = [ "Win32_System_Memory" ]; + "Win32_System_MessageQueuing" = [ "Win32_System" ]; + "Win32_System_MixedReality" = [ "Win32_System" ]; + "Win32_System_Ole" = [ "Win32_System" ]; + "Win32_System_PasswordManagement" = [ "Win32_System" ]; + "Win32_System_Performance" = [ "Win32_System" ]; + "Win32_System_Performance_HardwareCounterProfiling" = [ "Win32_System_Performance" ]; + "Win32_System_Pipes" = [ "Win32_System" ]; + "Win32_System_Power" = [ "Win32_System" ]; + "Win32_System_ProcessStatus" = [ "Win32_System" ]; + "Win32_System_Recovery" = [ "Win32_System" ]; + "Win32_System_Registry" = [ "Win32_System" ]; + "Win32_System_RemoteDesktop" = [ "Win32_System" ]; + "Win32_System_RemoteManagement" = [ "Win32_System" ]; + "Win32_System_RestartManager" = [ "Win32_System" ]; + "Win32_System_Restore" = [ "Win32_System" ]; + "Win32_System_Rpc" = [ "Win32_System" ]; + "Win32_System_Search" = [ "Win32_System" ]; + "Win32_System_Search_Common" = [ "Win32_System_Search" ]; + "Win32_System_SecurityCenter" = [ "Win32_System" ]; + "Win32_System_Services" = [ "Win32_System" ]; + "Win32_System_SetupAndMigration" = [ "Win32_System" ]; + "Win32_System_Shutdown" = [ "Win32_System" ]; + "Win32_System_StationsAndDesktops" = [ "Win32_System" ]; + "Win32_System_SubsystemForLinux" = [ "Win32_System" ]; + "Win32_System_SystemInformation" = [ "Win32_System" ]; + "Win32_System_SystemServices" = [ "Win32_System" ]; + "Win32_System_Threading" = [ "Win32_System" ]; + "Win32_System_Time" = [ "Win32_System" ]; + "Win32_System_TpmBaseServices" = [ "Win32_System" ]; + "Win32_System_UserAccessLogging" = [ "Win32_System" ]; + "Win32_System_Variant" = [ "Win32_System" ]; + "Win32_System_VirtualDosMachines" = [ "Win32_System" ]; + "Win32_System_WindowsProgramming" = [ "Win32_System" ]; + "Win32_System_Wmi" = [ "Win32_System" ]; + "Win32_UI" = [ "Win32" ]; + "Win32_UI_Accessibility" = [ "Win32_UI" ]; + "Win32_UI_ColorSystem" = [ "Win32_UI" ]; + "Win32_UI_Controls" = [ "Win32_UI" ]; + "Win32_UI_Controls_Dialogs" = [ "Win32_UI_Controls" ]; + "Win32_UI_HiDpi" = [ "Win32_UI" ]; + "Win32_UI_Input" = [ "Win32_UI" ]; + "Win32_UI_Input_Ime" = [ "Win32_UI_Input" ]; + "Win32_UI_Input_KeyboardAndMouse" = [ "Win32_UI_Input" ]; + "Win32_UI_Input_Pointer" = [ "Win32_UI_Input" ]; + "Win32_UI_Input_Touch" = [ "Win32_UI_Input" ]; + "Win32_UI_Input_XboxController" = [ "Win32_UI_Input" ]; + "Win32_UI_InteractionContext" = [ "Win32_UI" ]; + "Win32_UI_Magnification" = [ "Win32_UI" ]; + "Win32_UI_Shell" = [ "Win32_UI" ]; + "Win32_UI_Shell_PropertiesSystem" = [ "Win32_UI_Shell" ]; + "Win32_UI_TabletPC" = [ "Win32_UI" ]; + "Win32_UI_TextServices" = [ "Win32_UI" ]; + "Win32_UI_WindowsAndMessaging" = [ "Win32_UI" ]; + "Win32_Web" = [ "Win32" ]; + "Win32_Web_InternetExplorer" = [ "Win32_Web" ]; + }; + resolvedDefaultFeatures = [ "Win32" "Win32_Foundation" "Win32_System" "Win32_System_Console" "default" ]; + }; "windows-targets 0.42.2" = rec { crateName = "windows-targets"; version = "0.42.2"; @@ -10221,6 +10723,53 @@ rec { } ]; + }; + "windows-targets 0.52.0" = rec { + crateName = "windows-targets"; + version = "0.52.0"; + edition = "2021"; + sha256 = "1kg7a27ynzw8zz3krdgy6w5gbqcji27j1sz4p7xk2j5j8082064a"; + authors = [ + "Microsoft" + ]; + dependencies = [ + { + name = "windows_aarch64_gnullvm"; + packageId = "windows_aarch64_gnullvm 0.52.0"; + target = { target, features }: (pkgs.rust.lib.toRustTarget stdenv.hostPlatform == "aarch64-pc-windows-gnullvm"); + } + { + name = "windows_aarch64_msvc"; + packageId = "windows_aarch64_msvc 0.52.0"; + target = { target, features }: (("aarch64" == target."arch" or null) && ("msvc" == target."env" or null) && (!(target."windows_raw_dylib" or false))); + } + { + name = "windows_i686_gnu"; + packageId = "windows_i686_gnu 0.52.0"; + target = { target, features }: (("x86" == target."arch" or null) && ("gnu" == target."env" or null) && (!(target."windows_raw_dylib" or false))); + } + { + name = "windows_i686_msvc"; + packageId = "windows_i686_msvc 0.52.0"; + target = { target, features }: (("x86" == target."arch" or null) && ("msvc" == target."env" or null) && (!(target."windows_raw_dylib" or false))); + } + { + name = "windows_x86_64_gnu"; + packageId = "windows_x86_64_gnu 0.52.0"; + target = { target, features }: (("x86_64" == target."arch" or null) && ("gnu" == target."env" or null) && (!("llvm" == target."abi" or null)) && (!(target."windows_raw_dylib" or false))); + } + { + name = "windows_x86_64_gnullvm"; + packageId = "windows_x86_64_gnullvm 0.52.0"; + target = { target, features }: (pkgs.rust.lib.toRustTarget stdenv.hostPlatform == "x86_64-pc-windows-gnullvm"); + } + { + name = "windows_x86_64_msvc"; + packageId = "windows_x86_64_msvc 0.52.0"; + target = { target, features }: (("x86_64" == target."arch" or null) && ("msvc" == target."env" or null) && (!(target."windows_raw_dylib" or false))); + } + ]; + }; "windows_aarch64_gnullvm 0.42.2" = rec { crateName = "windows_aarch64_gnullvm"; @@ -10241,6 +10790,16 @@ rec { "Microsoft" ]; + }; + "windows_aarch64_gnullvm 0.52.0" = rec { + crateName = "windows_aarch64_gnullvm"; + version = "0.52.0"; + edition = "2021"; + sha256 = "1shmn1kbdc0bpphcxz0vlph96bxz0h1jlmh93s9agf2dbpin8xyb"; + authors = [ + "Microsoft" + ]; + }; "windows_aarch64_msvc 0.42.2" = rec { crateName = "windows_aarch64_msvc"; @@ -10261,6 +10820,16 @@ rec { "Microsoft" ]; + }; + "windows_aarch64_msvc 0.52.0" = rec { + crateName = "windows_aarch64_msvc"; + version = "0.52.0"; + edition = "2021"; + sha256 = "1vvmy1ypvzdvxn9yf0b8ygfl85gl2gpcyvsvqppsmlpisil07amv"; + authors = [ + "Microsoft" + ]; + }; "windows_i686_gnu 0.42.2" = rec { crateName = "windows_i686_gnu"; @@ -10281,6 +10850,16 @@ rec { "Microsoft" ]; + }; + "windows_i686_gnu 0.52.0" = rec { + crateName = "windows_i686_gnu"; + version = "0.52.0"; + edition = "2021"; + sha256 = "04zkglz4p3pjsns5gbz85v4s5aw102raz4spj4b0lmm33z5kg1m2"; + authors = [ + "Microsoft" + ]; + }; "windows_i686_msvc 0.42.2" = rec { crateName = "windows_i686_msvc"; @@ -10301,6 +10880,16 @@ rec { "Microsoft" ]; + }; + "windows_i686_msvc 0.52.0" = rec { + crateName = "windows_i686_msvc"; + version = "0.52.0"; + edition = "2021"; + sha256 = "16kvmbvx0vr0zbgnaz6nsks9ycvfh5xp05bjrhq65kj623iyirgz"; + authors = [ + "Microsoft" + ]; + }; "windows_x86_64_gnu 0.42.2" = rec { crateName = "windows_x86_64_gnu"; @@ -10321,6 +10910,16 @@ rec { "Microsoft" ]; + }; + "windows_x86_64_gnu 0.52.0" = rec { + crateName = "windows_x86_64_gnu"; + version = "0.52.0"; + edition = "2021"; + sha256 = "1zdy4qn178sil5sdm63lm7f0kkcjg6gvdwmcprd2yjmwn8ns6vrx"; + authors = [ + "Microsoft" + ]; + }; "windows_x86_64_gnullvm 0.42.2" = rec { crateName = "windows_x86_64_gnullvm"; @@ -10341,6 +10940,16 @@ rec { "Microsoft" ]; + }; + "windows_x86_64_gnullvm 0.52.0" = rec { + crateName = "windows_x86_64_gnullvm"; + version = "0.52.0"; + edition = "2021"; + sha256 = "17lllq4l2k1lqgcnw1cccphxp9vs7inq99kjlm2lfl9zklg7wr8s"; + authors = [ + "Microsoft" + ]; + }; "windows_x86_64_msvc 0.42.2" = rec { crateName = "windows_x86_64_msvc"; @@ -10361,6 +10970,16 @@ rec { "Microsoft" ]; + }; + "windows_x86_64_msvc 0.52.0" = rec { + crateName = "windows_x86_64_msvc"; + version = "0.52.0"; + edition = "2021"; + sha256 = "012wfq37f18c09ij5m6rniw7xxn5fcvrxbqd0wd8vgnl3hfn9yfz"; + authors = [ + "Microsoft" + ]; + }; "xml-rs" = rec { crateName = "xml-rs"; @@ -10698,6 +11317,16 @@ rec { # recreate a file hierarchy as when running tests with cargo # the source for test data + # It's necessary to locate the source in $NIX_BUILD_TOP/source/ + # instead of $NIX_BUILD_TOP/ + # because we compiled those test binaries in the former and not the latter. + # So all paths will expect source tree to be there and not in the build top directly. + # For example: $NIX_BUILD_TOP := /build in general, if you ask yourself. + # TODO(raitobezarius): I believe there could be more edge cases if `crate.sourceRoot` + # do exist but it's very hard to reason about them, so let's wait until the first bug report. + mkdir -p source/ + cd source/ + ${pkgs.buildPackages.xorg.lndir}/bin/lndir ${crate.src} # build outputs diff --git a/tvix/tools/crunch-v2/Cargo.toml b/tvix/tools/crunch-v2/Cargo.toml index eb44805d7c2c..1e3f0252504d 100644 --- a/tvix/tools/crunch-v2/Cargo.toml +++ b/tvix/tools/crunch-v2/Cargo.toml @@ -33,6 +33,7 @@ zstd = "0.13.0" prost = "0.12.2" polars = { version = "0.35.4", default-features = false, features = ["parquet", "lazy", "sql", "dtype-struct"] } indicatif = "0.17.7" +clap = { version = "4.4.18", features = ["derive"] } [build-dependencies] prost-build = "0.12.2" diff --git a/tvix/tools/crunch-v2/src/bin/extract.rs b/tvix/tools/crunch-v2/src/bin/extract.rs index 8da8df707a0e..416d201f4e04 100644 --- a/tvix/tools/crunch-v2/src/bin/extract.rs +++ b/tvix/tools/crunch-v2/src/bin/extract.rs @@ -5,13 +5,12 @@ //! They are concatenated without any additional structure, so nothing but the chunk list is preserved. use anyhow::Result; +use clap::Parser; use indicatif::{ProgressBar, ProgressStyle}; use std::fs::File; +use std::path::PathBuf; -use crunch_v2::{ - proto::{self, path::Node}, - FILES, -}; +use crunch_v2::proto::{self, path::Node}; use prost::Message; use polars::{ @@ -23,15 +22,32 @@ use polars::{ series::IntoSeries, }; +#[derive(Parser)] +struct Args { + /// Path to the sled database that's read from. + #[clap(default_value = "crunch.db")] + infile: PathBuf, + + /// Path to the resulting parquet file that's written. + #[clap(default_value = "crunch.parquet")] + outfile: PathBuf, +} + fn main() -> Result<()> { - let w = ParquetWriter::new(File::create("crunch.parquet")?); + let args = Args::parse(); + + let w = ParquetWriter::new(File::create(args.outfile)?); + + let db: sled::Db = sled::open(&args.infile).unwrap(); + let files_tree: sled::Tree = db.open_tree("files").unwrap(); - let progress = ProgressBar::new(FILES.len() as u64).with_style(ProgressStyle::with_template( - "{elapsed_precise}/{duration_precise} {wide_bar} {pos}/{len}", - )?); + let progress = + ProgressBar::new(files_tree.len() as u64).with_style(ProgressStyle::with_template( + "{elapsed_precise}/{duration_precise} {wide_bar} {pos}/{len}", + )?); let mut frame = FrameBuilder::new(); - for entry in &*FILES { + for entry in &files_tree { let (file_hash, pb) = entry?; frame.push( file_hash[..].try_into().unwrap(), diff --git a/tvix/tools/crunch-v2/src/lib.rs b/tvix/tools/crunch-v2/src/lib.rs index 0f84e84f1772..09ea2e75d5a3 100644 --- a/tvix/tools/crunch-v2/src/lib.rs +++ b/tvix/tools/crunch-v2/src/lib.rs @@ -1,10 +1,3 @@ -use lazy_static::lazy_static; - pub mod proto { include!(concat!(env!("OUT_DIR"), "/tvix.flatstore.v1.rs")); } - -lazy_static! { - static ref DB: sled::Db = sled::open("crunch.db").unwrap(); - pub static ref FILES: sled::Tree = DB.open_tree("files").unwrap(); -} diff --git a/tvix/tools/crunch-v2/src/main.rs b/tvix/tools/crunch-v2/src/main.rs index 2aa2939fa1b4..a5d538f6beac 100644 --- a/tvix/tools/crunch-v2/src/main.rs +++ b/tvix/tools/crunch-v2/src/main.rs @@ -9,16 +9,17 @@ //! //! flatstore protobufs are written to a sled database named `crunch.db`, addressed by file hash. -use crunch_v2::{proto, FILES}; +use crunch_v2::proto; mod remote; use anyhow::Result; +use clap::Parser; use futures::{stream, StreamExt, TryStreamExt}; use indicatif::{ProgressBar, ProgressStyle}; use std::{ - env, io::{self, BufRead, Read, Write}, + path::PathBuf, ptr, }; @@ -34,13 +35,39 @@ use digest::Digest; use prost::Message; use sha2::Sha256; +#[derive(Parser)] +struct Args { + /// Path to an existing parquet file. + /// The `file_hash` column should contain SHA-256 hashes of the compressed + /// data, corresponding to the `FileHash` narinfo field. + /// The `compression` column should contain either `"bzip2"` or `"xz"`, + /// corresponding to the `Compression` narinfo field. + /// Additional columns are ignored, but can be used by the SQL filter expression. + #[clap(long, default_value = "ingest.parquet")] + infile: PathBuf, + + /// Filter expression to filter elements in the parquet file for. + filter: String, + + /// Average chunk size for FastCDC, in KiB. + /// min value is half, max value double of that number. + #[clap(long, default_value_t = 256)] + avg_chunk_size: u32, + + /// Path to the sled database where results are written to (flatstore + /// protobufs, addressed by file hash). + #[clap(long, default_value = "crunch.db")] + outfile: PathBuf, +} + #[tokio::main] async fn main() -> Result<()> { - let mut args = env::args(); - args.next().unwrap(); + let args = Args::parse(); - let filter = sql_expr(args.next().unwrap())?; - let df = LazyFrame::scan_parquet("ingest.parquet", ScanArgsParquet::default())? + let filter = sql_expr(args.filter)?; + let avg_chunk_size = args.avg_chunk_size * 1024; + + let df = LazyFrame::scan_parquet(&args.infile, ScanArgsParquet::default())? .filter(filter) .select([col("file_hash"), col("compression")]) .drop_nulls(None) @@ -62,12 +89,16 @@ async fn main() -> Result<()> { .into_iter() .map(|c| c.unwrap()); + let db: sled::Db = sled::open(args.outfile).unwrap(); + let files_tree = db.open_tree("files").unwrap(); + let res = stream::iter(file_hash.zip(compression)) .map(Ok) .try_for_each_concurrent(Some(16), |(file_hash, compression)| { let progress = progress.clone(); + let files_tree = files_tree.clone(); async move { - if FILES.contains_key(&file_hash)? { + if files_tree.contains_key(&file_hash)? { progress.inc(1); return Ok(()); } @@ -77,12 +108,15 @@ async fn main() -> Result<()> { tokio::task::spawn_blocking(move || { let mut reader = Sha256Reader::from(reader); - let path = ingest(nar::open(&mut reader)?, vec![]).map(|node| proto::Path { - nar_hash: reader.finalize().as_slice().into(), - node: Some(node), - })?; + let path = + ingest(nar::open(&mut reader)?, vec![], avg_chunk_size).map(|node| { + proto::Path { + nar_hash: reader.finalize().as_slice().into(), + node: Some(node), + } + })?; - FILES.insert(file_hash, path.encode_to_vec())?; + files_tree.insert(file_hash, path.encode_to_vec())?; progress.inc(1); Ok::<_, anyhow::Error>(()) @@ -92,7 +126,7 @@ async fn main() -> Result<()> { }) .await; - let flush = crunch_v2::FILES.flush_async().await; + let flush = files_tree.flush_async().await; res?; flush?; @@ -100,7 +134,7 @@ async fn main() -> Result<()> { Ok(()) } -fn ingest(node: nar::Node, name: Vec) -> Result { +fn ingest(node: nar::Node, name: Vec, avg_chunk_size: u32) -> Result { match node { nar::Node::Symlink { target } => Ok(proto::path::Node::Symlink(proto::SymlinkNode { name, @@ -113,7 +147,7 @@ fn ingest(node: nar::Node, name: Vec) -> Result { let mut symlinks = vec![]; while let Some(node) = reader.next()? { - match ingest(node.node, node.name)? { + match ingest(node.node, node.name, avg_chunk_size)? { proto::path::Node::Directory(node) => { directories.push(node); } @@ -138,7 +172,12 @@ fn ingest(node: nar::Node, name: Vec) -> Result { let mut reader = B3Reader::from(reader); let mut chunks = vec![]; - for chunk in StreamCDC::new(&mut reader, 1 << 17, 1 << 18, 1 << 19) { + for chunk in StreamCDC::new( + &mut reader, + avg_chunk_size / 2, + avg_chunk_size, + avg_chunk_size * 2, + ) { let ChunkData { length: size, data, .. } = chunk?; -- cgit 1.4.1