about summary refs log tree commit diff
path: root/tvix/tools/weave/src
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/tools/weave/src')
-rw-r--r--tvix/tools/weave/src/lib.rs20
1 files changed, 12 insertions, 8 deletions
diff --git a/tvix/tools/weave/src/lib.rs b/tvix/tools/weave/src/lib.rs
index 12a86f9fb002..bc2221bf5c9b 100644
--- a/tvix/tools/weave/src/lib.rs
+++ b/tvix/tools/weave/src/lib.rs
@@ -1,6 +1,6 @@
 use anyhow::Result;
 use rayon::prelude::*;
-use std::{fs::File, slice};
+use std::{fs::File, ops::Range, slice};
 
 use polars::{
     datatypes::BinaryChunked,
@@ -49,8 +49,8 @@ pub fn as_fixed_binary<const N: usize>(
     chunked: &BinaryChunked,
 ) -> impl Iterator<Item = &[[u8; N]]> + DoubleEndedIterator {
     chunked.downcast_iter().map(|array| {
-        assert_fixed_dense::<N>(array);
-        exact_chunks(array.values()).unwrap()
+        let range = assert_fixed_dense::<N>(array);
+        exact_chunks(&array.values()[range]).unwrap()
     })
 }
 
@@ -61,20 +61,22 @@ fn into_fixed_binary_rechunk<const N: usize>(chunked: &BinaryChunked) -> FixedBy
     let mut iter = chunked.downcast_iter();
     let array = iter.next().unwrap();
 
-    assert_fixed_dense::<N>(array);
-    Bytes(array.values().clone()).map(|buf| exact_chunks(buf).unwrap())
+    let range = assert_fixed_dense::<N>(array);
+    Bytes(array.values().clone().sliced(range.start, range.len()))
+        .map(|buf| exact_chunks(buf).unwrap())
 }
 
 /// Ensures that the supplied Arrow array consists of densely packed bytestrings of length `N`.
 /// In other words, ensure that it is free of nulls, and that the offsets have a fixed stride of `N`.
-fn assert_fixed_dense<const N: usize>(array: &BinaryArray<i64>) {
+#[must_use = "only the range returned is guaranteed to be conformant"]
+fn assert_fixed_dense<const N: usize>(array: &BinaryArray<i64>) -> Range<usize> {
     let null_count = array.validity().map_or(0, |bits| bits.unset_bits());
     if null_count > 0 {
         panic!("null values present");
     }
 
-    let length_check = array
-        .offsets()
+    let offsets = array.offsets();
+    let length_check = offsets
         .as_slice()
         .par_windows(2)
         .all(|w| (w[1] - w[0]) == N as i64);
@@ -82,6 +84,8 @@ fn assert_fixed_dense<const N: usize>(array: &BinaryArray<i64>) {
     if !length_check {
         panic!("lengths are inconsistent");
     }
+
+    (*offsets.first() as usize)..(*offsets.last() as usize)
 }
 
 fn exact_chunks<const K: usize>(buf: &[u8]) -> Option<&[[u8; K]]> {