diff options
author | Florian Klink <flokli@flokli.de> | 2024-01-25T12·47+0200 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2024-01-27T18·40+0000 |
commit | b38be028d96ce107439f3323026270228a871a13 (patch) | |
tree | 991bc6341714a3896645240309d88dc20784f90e /tvix/tools/crunch-v2/src/bin/extract.rs | |
parent | 4f22203a3aecd070881ae9b4eabc47532d948f01 (diff) |
feat(tvix/tools/crunch-v2): add CLI args r/7453
Use clap derive to make the input and output files configurable, as well as the chunk size parameters. Change-Id: I02b29126f3bd2c13ba2c6e7e0aa4ff048ff803ed Reviewed-on: https://cl.tvl.fyi/c/depot/+/10691 Autosubmit: flokli <flokli@flokli.de> Tested-by: BuildkiteCI Reviewed-by: edef <edef@edef.eu>
Diffstat (limited to 'tvix/tools/crunch-v2/src/bin/extract.rs')
-rw-r--r-- | tvix/tools/crunch-v2/src/bin/extract.rs | 34 |
1 files changed, 25 insertions, 9 deletions
diff --git a/tvix/tools/crunch-v2/src/bin/extract.rs b/tvix/tools/crunch-v2/src/bin/extract.rs index 8da8df707a0e..416d201f4e04 100644 --- a/tvix/tools/crunch-v2/src/bin/extract.rs +++ b/tvix/tools/crunch-v2/src/bin/extract.rs @@ -5,13 +5,12 @@ //! They are concatenated without any additional structure, so nothing but the chunk list is preserved. use anyhow::Result; +use clap::Parser; use indicatif::{ProgressBar, ProgressStyle}; use std::fs::File; +use std::path::PathBuf; -use crunch_v2::{ - proto::{self, path::Node}, - FILES, -}; +use crunch_v2::proto::{self, path::Node}; use prost::Message; use polars::{ @@ -23,15 +22,32 @@ use polars::{ series::IntoSeries, }; +#[derive(Parser)] +struct Args { + /// Path to the sled database that's read from. + #[clap(default_value = "crunch.db")] + infile: PathBuf, + + /// Path to the resulting parquet file that's written. + #[clap(default_value = "crunch.parquet")] + outfile: PathBuf, +} + fn main() -> Result<()> { - let w = ParquetWriter::new(File::create("crunch.parquet")?); + let args = Args::parse(); + + let w = ParquetWriter::new(File::create(args.outfile)?); + + let db: sled::Db = sled::open(&args.infile).unwrap(); + let files_tree: sled::Tree = db.open_tree("files").unwrap(); - let progress = ProgressBar::new(FILES.len() as u64).with_style(ProgressStyle::with_template( - "{elapsed_precise}/{duration_precise} {wide_bar} {pos}/{len}", - )?); + let progress = + ProgressBar::new(files_tree.len() as u64).with_style(ProgressStyle::with_template( + "{elapsed_precise}/{duration_precise} {wide_bar} {pos}/{len}", + )?); let mut frame = FrameBuilder::new(); - for entry in &*FILES { + for entry in &files_tree { let (file_hash, pb) = entry?; frame.push( file_hash[..].try_into().unwrap(), |