From bdc2891053c28a5739c4be55f4816d362a5f08e2 Mon Sep 17 00:00:00 2001 From: edef Date: Sat, 19 Oct 2024 12:59:07 +0000 Subject: refactor(users/edef/weave): simplify OwningRef handling We switch to working with the underlying Arc> type, since Buffer is already essentially an OwningRef-esque subslice. Because we're now working with an exposed Arc directly, we don't need to have our own `unsafe impl StableAddress` any more. Change-Id: I9ce2edc6899177145e15b72aa5380f708a62173c Reviewed-on: https://cl.tvl.fyi/c/depot/+/12668 Reviewed-by: flokli Tested-by: BuildkiteCI --- users/edef/weave/src/bytes.rs | 27 --------------------------- users/edef/weave/src/lib.rs | 17 ++++++++++++----- 2 files changed, 12 insertions(+), 32 deletions(-) delete mode 100644 users/edef/weave/src/bytes.rs diff --git a/users/edef/weave/src/bytes.rs b/users/edef/weave/src/bytes.rs deleted file mode 100644 index 689b8fdfc0c8..000000000000 --- a/users/edef/weave/src/bytes.rs +++ /dev/null @@ -1,27 +0,0 @@ -use owning_ref::{OwningRef, StableAddress}; -use polars::export::arrow::buffer::Buffer; -use std::ops::Deref; - -/// An shared `[[u8; N]]` backed by a Polars [Buffer]. -pub type FixedBytes = OwningRef<'static, Bytes, [[u8; N]]>; - -/// Wrapper struct to make [Buffer] implement [StableAddress]. -/// TODO(edef): upstream the `impl` -pub struct Bytes(pub Buffer); - -/// SAFETY: [Buffer] is always an Arc+Vec indirection. -unsafe impl StableAddress for Bytes {} - -impl Bytes { - pub fn map(self, f: impl FnOnce(&[u8]) -> &U) -> OwningRef<'static, Self, U> { - OwningRef::new(self).map(f) - } -} - -impl Deref for Bytes { - type Target = [u8]; - - fn deref(&self) -> &Self::Target { - &*self.0 - } -} diff --git a/users/edef/weave/src/lib.rs b/users/edef/weave/src/lib.rs index 8d365a4c40f1..db3d07e7de07 100644 --- a/users/edef/weave/src/lib.rs +++ b/users/edef/weave/src/lib.rs @@ -1,4 +1,5 @@ use anyhow::Result; +use owning_ref::ArcRef; use rayon::prelude::*; use std::{fs::File, ops::Range, slice}; @@ -8,8 +9,9 @@ use polars::{ prelude::{ParquetReader, SerReader}, }; -pub use crate::bytes::*; -mod bytes; +/// An shared `[[u8; N]]` backed by a Polars [Buffer]. +pub type FixedBytes = + ArcRef<'static, polars::export::arrow::buffer::Bytes, [[u8; N]]>; pub const INDEX_NULL: u32 = !0; pub const DONE: &str = "\u{2714}"; @@ -60,10 +62,15 @@ fn into_fixed_binary_rechunk(chunked: &BinaryChunked) -> FixedBy let chunked = chunked.rechunk(); let mut iter = chunked.downcast_iter(); let array = iter.next().unwrap(); + assert!(iter.next().is_none()); - let range = assert_fixed_dense::(array); - Bytes(array.values().clone().sliced(range.start, range.len())) - .map(|buf| exact_chunks(buf).unwrap()) + let (buf, off, len) = { + let range = assert_fixed_dense::(array); + array.values().clone().sliced(range.start, range.len()) + } + .into_inner(); + + ArcRef::new(buf).map(|bytes| exact_chunks(&bytes[off..off + len]).unwrap()) } /// Ensures that the supplied Arrow array consists of densely packed bytestrings of length `N`. -- cgit 1.4.1