diff options
Diffstat (limited to 'tvix/tools/crunch-v2/src/bin/extract.rs')
-rw-r--r-- | tvix/tools/crunch-v2/src/bin/extract.rs | 34 |
1 files changed, 25 insertions, 9 deletions
diff --git a/tvix/tools/crunch-v2/src/bin/extract.rs b/tvix/tools/crunch-v2/src/bin/extract.rs index 8da8df707a0e..416d201f4e04 100644 --- a/tvix/tools/crunch-v2/src/bin/extract.rs +++ b/tvix/tools/crunch-v2/src/bin/extract.rs @@ -5,13 +5,12 @@ //! They are concatenated without any additional structure, so nothing but the chunk list is preserved. use anyhow::Result; +use clap::Parser; use indicatif::{ProgressBar, ProgressStyle}; use std::fs::File; +use std::path::PathBuf; -use crunch_v2::{ - proto::{self, path::Node}, - FILES, -}; +use crunch_v2::proto::{self, path::Node}; use prost::Message; use polars::{ @@ -23,15 +22,32 @@ use polars::{ series::IntoSeries, }; +#[derive(Parser)] +struct Args { + /// Path to the sled database that's read from. + #[clap(default_value = "crunch.db")] + infile: PathBuf, + + /// Path to the resulting parquet file that's written. + #[clap(default_value = "crunch.parquet")] + outfile: PathBuf, +} + fn main() -> Result<()> { - let w = ParquetWriter::new(File::create("crunch.parquet")?); + let args = Args::parse(); + + let w = ParquetWriter::new(File::create(args.outfile)?); + + let db: sled::Db = sled::open(&args.infile).unwrap(); + let files_tree: sled::Tree = db.open_tree("files").unwrap(); - let progress = ProgressBar::new(FILES.len() as u64).with_style(ProgressStyle::with_template( - "{elapsed_precise}/{duration_precise} {wide_bar} {pos}/{len}", - )?); + let progress = + ProgressBar::new(files_tree.len() as u64).with_style(ProgressStyle::with_template( + "{elapsed_precise}/{duration_precise} {wide_bar} {pos}/{len}", + )?); let mut frame = FrameBuilder::new(); - for entry in &*FILES { + for entry in &files_tree { let (file_hash, pb) = entry?; frame.push( file_hash[..].try_into().unwrap(), |