diff options
author | edef <edef@edef.eu> | 2024-10-17T13·26+0000 |
---|---|---|
committer | edef <edef@edef.eu> | 2024-10-19T17·01+0000 |
commit | 313899c291f0295506c275418e570b39b4a5f079 (patch) | |
tree | 0a4c737f7c54082eaf3957072693115e29fabcd1 /users/edef/weave/src/lib.rs | |
parent | bdc2891053c28a5739c4be55f4816d362a5f08e2 (diff) |
refactor(users/edef/weave/swizzle): use polars streaming r/8844
This vastly reduces the memory requirements, so we can run in ~40G RAM. Change-Id: I4952a780df294bd852a8b4682ba2fd59b9bae675 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12667 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
Diffstat (limited to 'users/edef/weave/src/lib.rs')
-rw-r--r-- | users/edef/weave/src/lib.rs | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/users/edef/weave/src/lib.rs b/users/edef/weave/src/lib.rs index db3d07e7de07..4ccd566ca52d 100644 --- a/users/edef/weave/src/lib.rs +++ b/users/edef/weave/src/lib.rs @@ -1,7 +1,13 @@ use anyhow::Result; -use owning_ref::ArcRef; +use owning_ref::{ArcRef, OwningRef}; use rayon::prelude::*; -use std::{fs::File, ops::Range, slice}; +use std::{ + fs::File, + mem, + ops::{Deref, Range}, + slice, + sync::Arc, +}; use polars::{ datatypes::BinaryChunked, @@ -24,6 +30,16 @@ pub fn hash64(h: &[u8; 20]) -> u64 { u64::from_ne_bytes(buf) } +pub fn leak<O, T: ?Sized>(r: OwningRef<Arc<O>, T>) -> &T { + // SAFETY: Either `ptr` points into the `Arc`, which lives until `r` is dropped, + // or it points at something else entirely which lives at least as long. + unsafe { + let ptr: *const T = r.deref(); + mem::forget(r); + &*ptr + } +} + /// Read a dense `store_path_hash` array from `narinfo.parquet`, /// returning it as an owned [FixedBytes]. pub fn load_ph_array() -> Result<FixedBytes<20>> { |