about summary refs log tree commit diff
path: root/users/edef/weave/src/lib.rs
diff options
context:
space:
mode:
authoredef <edef@edef.eu>2024-10-17T13·26+0000
committeredef <edef@edef.eu>2024-10-19T17·01+0000
commit313899c291f0295506c275418e570b39b4a5f079 (patch)
tree0a4c737f7c54082eaf3957072693115e29fabcd1 /users/edef/weave/src/lib.rs
parentbdc2891053c28a5739c4be55f4816d362a5f08e2 (diff)
refactor(users/edef/weave/swizzle): use polars streaming r/8844
This vastly reduces the memory requirements, so we can run in ~40G RAM.

Change-Id: I4952a780df294bd852a8b4682ba2fd59b9bae675
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12667
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Diffstat (limited to 'users/edef/weave/src/lib.rs')
-rw-r--r--users/edef/weave/src/lib.rs20
1 files changed, 18 insertions, 2 deletions
diff --git a/users/edef/weave/src/lib.rs b/users/edef/weave/src/lib.rs
index db3d07e7de07..4ccd566ca52d 100644
--- a/users/edef/weave/src/lib.rs
+++ b/users/edef/weave/src/lib.rs
@@ -1,7 +1,13 @@
 use anyhow::Result;
-use owning_ref::ArcRef;
+use owning_ref::{ArcRef, OwningRef};
 use rayon::prelude::*;
-use std::{fs::File, ops::Range, slice};
+use std::{
+    fs::File,
+    mem,
+    ops::{Deref, Range},
+    slice,
+    sync::Arc,
+};
 
 use polars::{
     datatypes::BinaryChunked,
@@ -24,6 +30,16 @@ pub fn hash64(h: &[u8; 20]) -> u64 {
     u64::from_ne_bytes(buf)
 }
 
+pub fn leak<O, T: ?Sized>(r: OwningRef<Arc<O>, T>) -> &T {
+    // SAFETY: Either `ptr` points into the `Arc`, which lives until `r` is dropped,
+    // or it points at something else entirely which lives at least as long.
+    unsafe {
+        let ptr: *const T = r.deref();
+        mem::forget(r);
+        &*ptr
+    }
+}
+
 /// Read a dense `store_path_hash` array from `narinfo.parquet`,
 /// returning it as an owned [FixedBytes].
 pub fn load_ph_array() -> Result<FixedBytes<20>> {