about summary refs log tree commit diff
path: root/tvix/tools/crunch-v2/src/bin/extract.rs
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-01-25T12·47+0200
committerclbot <clbot@tvl.fyi>2024-01-27T18·40+0000
commitb38be028d96ce107439f3323026270228a871a13 (patch)
tree991bc6341714a3896645240309d88dc20784f90e /tvix/tools/crunch-v2/src/bin/extract.rs
parent4f22203a3aecd070881ae9b4eabc47532d948f01 (diff)
feat(tvix/tools/crunch-v2): add CLI args r/7453
Use clap derive to make the input and output files configurable, as well
as the chunk size parameters.

Change-Id: I02b29126f3bd2c13ba2c6e7e0aa4ff048ff803ed
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10691
Autosubmit: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Reviewed-by: edef <edef@edef.eu>
Diffstat (limited to 'tvix/tools/crunch-v2/src/bin/extract.rs')
-rw-r--r--tvix/tools/crunch-v2/src/bin/extract.rs34
1 files changed, 25 insertions, 9 deletions
diff --git a/tvix/tools/crunch-v2/src/bin/extract.rs b/tvix/tools/crunch-v2/src/bin/extract.rs
index 8da8df707a0e..416d201f4e04 100644
--- a/tvix/tools/crunch-v2/src/bin/extract.rs
+++ b/tvix/tools/crunch-v2/src/bin/extract.rs
@@ -5,13 +5,12 @@
 //! They are concatenated without any additional structure, so nothing but the chunk list is preserved.
 
 use anyhow::Result;
+use clap::Parser;
 use indicatif::{ProgressBar, ProgressStyle};
 use std::fs::File;
+use std::path::PathBuf;
 
-use crunch_v2::{
-    proto::{self, path::Node},
-    FILES,
-};
+use crunch_v2::proto::{self, path::Node};
 use prost::Message;
 
 use polars::{
@@ -23,15 +22,32 @@ use polars::{
     series::IntoSeries,
 };
 
+#[derive(Parser)]
+struct Args {
+    /// Path to the sled database that's read from.
+    #[clap(default_value = "crunch.db")]
+    infile: PathBuf,
+
+    /// Path to the resulting parquet file that's written.
+    #[clap(default_value = "crunch.parquet")]
+    outfile: PathBuf,
+}
+
 fn main() -> Result<()> {
-    let w = ParquetWriter::new(File::create("crunch.parquet")?);
+    let args = Args::parse();
+
+    let w = ParquetWriter::new(File::create(args.outfile)?);
+
+    let db: sled::Db = sled::open(&args.infile).unwrap();
+    let files_tree: sled::Tree = db.open_tree("files").unwrap();
 
-    let progress = ProgressBar::new(FILES.len() as u64).with_style(ProgressStyle::with_template(
-        "{elapsed_precise}/{duration_precise} {wide_bar} {pos}/{len}",
-    )?);
+    let progress =
+        ProgressBar::new(files_tree.len() as u64).with_style(ProgressStyle::with_template(
+            "{elapsed_precise}/{duration_precise} {wide_bar} {pos}/{len}",
+        )?);
 
     let mut frame = FrameBuilder::new();
-    for entry in &*FILES {
+    for entry in &files_tree {
         let (file_hash, pb) = entry?;
         frame.push(
             file_hash[..].try_into().unwrap(),