about summary refs log tree commit diff
path: root/users/edef/crunch-v2/src/remote.rs
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2024-10-12T22·33+0300
committerclbot <clbot@tvl.fyi>2024-10-12T23·02+0000
commit398a9b531758297417280ccf06b82c85f3d236dd (patch)
tree1cc1461a07db173c2c2594f447d6d584976dcac1 /users/edef/crunch-v2/src/remote.rs
parentb21cb11b7f65d84baca62a0492d63d184e4f5095 (diff)
chore(tvix/tools): move crunch-v2 to //users/edef r/8798
This is not a core Tvix tool, it's some sort of one-off analysis thing.

Change-Id: I05fcbed45abad27d6b5cfd49db1727249dad3971
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12603
Autosubmit: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
Reviewed-by: edef <edef@edef.eu>
Diffstat (limited to 'users/edef/crunch-v2/src/remote.rs')
-rw-r--r--users/edef/crunch-v2/src/remote.rs211
1 files changed, 211 insertions, 0 deletions
diff --git a/users/edef/crunch-v2/src/remote.rs b/users/edef/crunch-v2/src/remote.rs
new file mode 100644
index 000000000000..93952ecd737f
--- /dev/null
+++ b/users/edef/crunch-v2/src/remote.rs
@@ -0,0 +1,211 @@
+use std::{
+    cmp,
+    io::{self, BufRead, BufReader, Read},
+    pin::Pin,
+    task::{self, Poll},
+};
+
+use anyhow::{bail, Result};
+use bytes::{Buf, Bytes};
+use futures::{future::BoxFuture, Future, FutureExt, Stream, StreamExt};
+use lazy_static::lazy_static;
+use tokio::runtime::Handle;
+
+use nix_compat::nixbase32;
+
+use rusoto_core::{ByteStream, Region};
+use rusoto_s3::{GetObjectOutput, GetObjectRequest, S3Client, S3};
+
+use bzip2::read::BzDecoder;
+use xz2::read::XzDecoder;
+
+lazy_static! {
+    static ref S3_CLIENT: S3Client = S3Client::new(Region::UsEast1);
+}
+
+const BUCKET: &str = "nix-cache";
+
+pub async fn nar(
+    file_hash: [u8; 32],
+    compression: &str,
+) -> Result<Box<BufReader<dyn Read + Send>>> {
+    let (extension, decompress): (&'static str, fn(_) -> Box<_>) = match compression {
+        "bzip2" => ("bz2", decompress_bz2),
+        "xz" => ("xz", decompress_xz),
+        _ => bail!("unknown compression: {compression}"),
+    };
+
+    Ok(decompress(
+        FileStream::new(FileKey {
+            file_hash,
+            extension,
+        })
+        .await?
+        .into(),
+    ))
+}
+
+fn decompress_xz(reader: FileStreamReader) -> Box<BufReader<dyn Read + Send>> {
+    Box::new(BufReader::new(XzDecoder::new(reader)))
+}
+
+fn decompress_bz2(reader: FileStreamReader) -> Box<BufReader<dyn Read + Send>> {
+    Box::new(BufReader::new(BzDecoder::new(reader)))
+}
+
+struct FileStreamReader {
+    inner: FileStream,
+    buffer: Bytes,
+}
+
+impl From<FileStream> for FileStreamReader {
+    fn from(value: FileStream) -> Self {
+        FileStreamReader {
+            inner: value,
+            buffer: Bytes::new(),
+        }
+    }
+}
+
+impl Read for FileStreamReader {
+    fn read(&mut self, dst: &mut [u8]) -> io::Result<usize> {
+        let src = self.fill_buf()?;
+        let n = cmp::min(src.len(), dst.len());
+        dst[..n].copy_from_slice(&src[..n]);
+        self.consume(n);
+        Ok(n)
+    }
+}
+
+impl BufRead for FileStreamReader {
+    fn fill_buf(&mut self) -> io::Result<&[u8]> {
+        if !self.buffer.is_empty() {
+            return Ok(&self.buffer);
+        }
+
+        self.buffer = Handle::current()
+            .block_on(self.inner.next())
+            .transpose()?
+            .unwrap_or_default();
+
+        Ok(&self.buffer)
+    }
+
+    fn consume(&mut self, cnt: usize) {
+        self.buffer.advance(cnt);
+    }
+}
+
+struct FileKey {
+    file_hash: [u8; 32],
+    extension: &'static str,
+}
+
+impl FileKey {
+    fn get(
+        &self,
+        offset: u64,
+        e_tag: Option<&str>,
+    ) -> impl Future<Output = io::Result<GetObjectOutput>> + Send + 'static {
+        let input = GetObjectRequest {
+            bucket: BUCKET.to_string(),
+            key: format!(
+                "nar/{}.nar.{}",
+                nixbase32::encode(&self.file_hash),
+                self.extension
+            ),
+            if_match: e_tag.map(str::to_owned),
+            range: Some(format!("bytes {}-", offset + 1)),
+            ..Default::default()
+        };
+
+        async {
+            S3_CLIENT
+                .get_object(input)
+                .await
+                .map_err(|e| io::Error::new(io::ErrorKind::Other, e))
+        }
+    }
+}
+
+struct FileStream {
+    key: FileKey,
+    e_tag: String,
+    offset: u64,
+    length: u64,
+    inner: FileStreamState,
+}
+
+enum FileStreamState {
+    Response(BoxFuture<'static, io::Result<GetObjectOutput>>),
+    Body(ByteStream),
+    Eof,
+}
+
+impl FileStream {
+    pub async fn new(key: FileKey) -> io::Result<Self> {
+        let resp = key.get(0, None).await?;
+
+        Ok(FileStream {
+            key,
+            e_tag: resp.e_tag.unwrap(),
+            offset: 0,
+            length: resp.content_length.unwrap().try_into().unwrap(),
+            inner: FileStreamState::Body(resp.body.unwrap()),
+        })
+    }
+}
+
+macro_rules! poll {
+    ($expr:expr) => {
+        match $expr {
+            Poll::Pending => {
+                return Poll::Pending;
+            }
+            Poll::Ready(value) => value,
+        }
+    };
+}
+
+impl Stream for FileStream {
+    type Item = io::Result<Bytes>;
+
+    fn poll_next(self: Pin<&mut Self>, cx: &mut task::Context) -> Poll<Option<Self::Item>> {
+        let this = self.get_mut();
+
+        let chunk = loop {
+            match &mut this.inner {
+                FileStreamState::Response(resp) => match poll!(resp.poll_unpin(cx)) {
+                    Err(err) => {
+                        this.inner = FileStreamState::Eof;
+                        return Poll::Ready(Some(Err(err)));
+                    }
+                    Ok(resp) => {
+                        this.inner = FileStreamState::Body(resp.body.unwrap());
+                    }
+                },
+                FileStreamState::Body(body) => match poll!(body.poll_next_unpin(cx)) {
+                    None | Some(Err(_)) => {
+                        this.inner = FileStreamState::Response(
+                            this.key.get(this.offset, Some(&this.e_tag)).boxed(),
+                        );
+                    }
+                    Some(Ok(chunk)) => {
+                        break chunk;
+                    }
+                },
+                FileStreamState::Eof => {
+                    return Poll::Ready(None);
+                }
+            }
+        };
+
+        this.offset += chunk.len() as u64;
+
+        if this.offset >= this.length {
+            this.inner = FileStreamState::Eof;
+        }
+
+        Poll::Ready(Some(Ok(chunk)))
+    }
+}