From da6cbb4a459d02111c44a67d3d0dd7e654abff23 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Wed, 13 Sep 2023 14:20:21 +0200 Subject: refactor(tvix/store/blobsvc): make BlobStore async We previously kept the trait of a BlobService sync. This however had some annoying consequences: - It became more and more complicated to track when we're in a context with an async runtime in the context or not, producing bugs like https://b.tvl.fyi/issues/304 - The sync trait shielded away async clients from async worloads, requiring manual block_on code inside the gRPC client code, and spawn_blocking calls in consumers of the trait, even if they were async (like the gRPC server) - We had to write our own custom glue code (SyncReadIntoAsyncRead) to convert a sync io::Read into a tokio::io::AsyncRead, which already existed in tokio internally, but upstream ia hesitant to expose. This now makes the BlobService trait async (via the async_trait macro, like we already do in various gRPC parts), and replaces the sync readers and writers with their async counterparts. Tests interacting with a BlobService now need to have an async runtime available, the easiest way for this is to mark the test functions with the tokio::test macro, allowing us to directly .await in the test function. In places where we don't have an async runtime available from context (like tvix-cli), we can pass one down explicitly. Now that we don't provide a sync interface anymore, the (sync) FUSE library now holds a pointer to a tokio runtime handle, and needs to at least have 2 threads available when talking to a blob service (which is why some of the tests now use the multi_thread flavor). The FUSE tests got a bit more verbose, as we couldn't use the setup_and_mount function accepting a callback anymore. We can hopefully move some of the test fixture setup to rstest in the future to make this less repetitive. Co-Authored-By: Connor Brewster Change-Id: Ia0501b606e32c852d0108de9c9016b21c94a3c05 Reviewed-on: https://cl.tvl.fyi/c/depot/+/9329 Reviewed-by: Connor Brewster Tested-by: BuildkiteCI Reviewed-by: raitobezarius --- tvix/store/src/blobservice/memory.rs | 45 +++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 13 deletions(-) (limited to 'tvix/store/src/blobservice/memory.rs') diff --git a/tvix/store/src/blobservice/memory.rs b/tvix/store/src/blobservice/memory.rs index 893f27364b80..383127344a17 100644 --- a/tvix/store/src/blobservice/memory.rs +++ b/tvix/store/src/blobservice/memory.rs @@ -1,9 +1,11 @@ -use std::io::{self, Cursor}; +use std::io::{self, Cursor, Write}; +use std::task::Poll; use std::{ collections::HashMap, sync::{Arc, RwLock}, }; -use tracing::{instrument, warn}; +use tonic::async_trait; +use tracing::instrument; use super::{BlobReader, BlobService, BlobWriter}; use crate::{B3Digest, Error}; @@ -13,6 +15,7 @@ pub struct MemoryBlobService { db: Arc>>>, } +#[async_trait] impl BlobService for MemoryBlobService { /// Constructs a [MemoryBlobService] from the passed [url::Url]: /// - scheme has to be `memory://` @@ -31,12 +34,12 @@ impl BlobService for MemoryBlobService { } #[instrument(skip(self, digest), fields(blob.digest=%digest))] - fn has(&self, digest: &B3Digest) -> Result { + async fn has(&self, digest: &B3Digest) -> Result { let db = self.db.read().unwrap(); Ok(db.contains_key(digest)) } - fn open_read(&self, digest: &B3Digest) -> Result>, Error> { + async fn open_read(&self, digest: &B3Digest) -> Result>, Error> { let db = self.db.read().unwrap(); match db.get(digest).map(|x| Cursor::new(x.clone())) { @@ -46,7 +49,7 @@ impl BlobService for MemoryBlobService { } #[instrument(skip(self))] - fn open_write(&self) -> Box { + async fn open_write(&self) -> Box { Box::new(MemoryBlobWriter::new(self.db.clone())) } } @@ -70,9 +73,13 @@ impl MemoryBlobWriter { } } } -impl std::io::Write for MemoryBlobWriter { - fn write(&mut self, b: &[u8]) -> std::io::Result { - match &mut self.writers { +impl tokio::io::AsyncWrite for MemoryBlobWriter { + fn poll_write( + mut self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + b: &[u8], + ) -> std::task::Poll> { + Poll::Ready(match &mut self.writers { None => Err(io::Error::new( io::ErrorKind::NotConnected, "already closed", @@ -81,22 +88,34 @@ impl std::io::Write for MemoryBlobWriter { let bytes_written = buf.write(b)?; hasher.write(&b[..bytes_written]) } - } + }) } - fn flush(&mut self) -> std::io::Result<()> { - match &mut self.writers { + fn poll_flush( + self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + Poll::Ready(match self.writers { None => Err(io::Error::new( io::ErrorKind::NotConnected, "already closed", )), Some(_) => Ok(()), - } + }) + } + + fn poll_shutdown( + self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + // shutdown is "instantaneous", we only write to memory. + Poll::Ready(Ok(())) } } +#[async_trait] impl BlobWriter for MemoryBlobWriter { - fn close(&mut self) -> Result { + async fn close(&mut self) -> Result { if self.writers.is_none() { match &self.digest { Some(digest) => Ok(digest.clone()), -- cgit 1.4.1