From b74ffda583d40db8b94b58418f70430687b2eaf0 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Sun, 12 Feb 2023 12:03:26 +0100 Subject: feat(tvix/store): add blobservice This adds a BlobService trait, and an implementation for it using sled, and one using a HashMap. Change-Id: Id6bc1b629195d0b26fc503bd7d2dc9e43c41c317 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8087 Tested-by: BuildkiteCI Reviewed-by: tazjin --- tvix/store/src/blobservice/memory.rs | 59 ++++++++++++++++++++++++++++++++ tvix/store/src/blobservice/mod.rs | 20 +++++++++++ tvix/store/src/blobservice/sled.rs | 66 ++++++++++++++++++++++++++++++++++++ tvix/store/src/lib.rs | 1 + 4 files changed, 146 insertions(+) create mode 100644 tvix/store/src/blobservice/memory.rs create mode 100644 tvix/store/src/blobservice/mod.rs create mode 100644 tvix/store/src/blobservice/sled.rs diff --git a/tvix/store/src/blobservice/memory.rs b/tvix/store/src/blobservice/memory.rs new file mode 100644 index 000000000000..73028f8f3c63 --- /dev/null +++ b/tvix/store/src/blobservice/memory.rs @@ -0,0 +1,59 @@ +use data_encoding::BASE64; +use std::{ + collections::HashMap, + sync::{Arc, RwLock}, +}; +use tracing::instrument; + +use crate::{proto, Error}; + +use super::BlobService; + +#[derive(Clone)] +pub struct MemoryBlobService { + db: Arc, proto::BlobMeta>>>, +} + +impl MemoryBlobService { + pub fn new() -> Self { + let db = Arc::new(RwLock::new(HashMap::default())); + + Self { db } + } +} + +impl BlobService for MemoryBlobService { + #[instrument(skip(self, req), fields(blob.digest=BASE64.encode(&req.digest)))] + fn stat(&self, req: &proto::StatBlobRequest) -> Result, Error> { + if req.include_bao { + todo!("not implemented yet") + } + + let db = self.db.read().unwrap(); + // if include_chunks is also false, the user only wants to know if the + // blob is present at all. + if !req.include_chunks { + Ok(if db.contains_key(&req.digest) { + Some(proto::BlobMeta::default()) + } else { + None + }) + } else { + match db.get(&req.digest) { + None => Ok(None), + Some(blob_meta) => Ok(Some(blob_meta.clone())), + } + } + } + + #[instrument(skip(self, blob_meta, blob_digest), fields(blob.digest = BASE64.encode(blob_digest)))] + fn put(&self, blob_digest: &[u8], blob_meta: proto::BlobMeta) -> Result<(), Error> { + let mut db = self.db.write().unwrap(); + + db.insert(blob_digest.to_vec(), blob_meta); + + Ok(()) + // TODO: make sure all callers make sure the chunks exist. + // TODO: where should we calculate the bao? + } +} diff --git a/tvix/store/src/blobservice/mod.rs b/tvix/store/src/blobservice/mod.rs new file mode 100644 index 000000000000..53e941795e7e --- /dev/null +++ b/tvix/store/src/blobservice/mod.rs @@ -0,0 +1,20 @@ +use crate::{proto, Error}; + +mod memory; +mod sled; + +pub use self::memory::MemoryBlobService; +pub use self::sled::SledBlobService; + +/// The base trait all BlobService services need to implement. +/// It provides information about how a blob is chunked, +/// and allows creating new blobs by creating a BlobMeta (referring to chunks +/// in a [crate::chunkservice::ChunkService]). +pub trait BlobService { + /// Retrieve chunking information for a given blob + fn stat(&self, req: &proto::StatBlobRequest) -> Result, Error>; + + /// Insert chunking information for a given blob. + /// Implementations SHOULD make sure chunks referred do exist. + fn put(&self, blob_digest: &[u8], blob_meta: proto::BlobMeta) -> Result<(), Error>; +} diff --git a/tvix/store/src/blobservice/sled.rs b/tvix/store/src/blobservice/sled.rs new file mode 100644 index 000000000000..729a520ac2d6 --- /dev/null +++ b/tvix/store/src/blobservice/sled.rs @@ -0,0 +1,66 @@ +use std::path::PathBuf; + +use data_encoding::BASE64; +use prost::Message; +use tracing::instrument; + +use crate::{proto, Error}; + +use super::BlobService; + +#[derive(Clone)] +pub struct SledBlobService { + db: sled::Db, +} + +impl SledBlobService { + pub fn new(p: PathBuf) -> Result { + let config = sled::Config::default().use_compression(true).path(p); + let db = config.open()?; + + Ok(Self { db }) + } +} + +impl BlobService for SledBlobService { + #[instrument(name = "SledBlobService::stat", skip(self, req), fields(blob.digest=BASE64.encode(&req.digest)))] + fn stat(&self, req: &proto::StatBlobRequest) -> Result, Error> { + if req.include_bao { + todo!("not implemented yet") + } + + // if include_chunks is also false, the user only wants to know if the + // blob is present at all. + if !req.include_chunks { + match self.db.contains_key(&req.digest) { + Ok(false) => Ok(None), + Ok(true) => Ok(Some(proto::BlobMeta::default())), + Err(e) => Err(Error::StorageError(e.to_string())), + } + } else { + match self.db.get(&req.digest) { + Ok(None) => Ok(None), + Ok(Some(data)) => match proto::BlobMeta::decode(&*data) { + Ok(blob_meta) => Ok(Some(blob_meta)), + Err(e) => Err(Error::StorageError(format!( + "unable to parse blobmeta message for blob {}: {}", + BASE64.encode(&req.digest), + e + ))), + }, + Err(e) => Err(Error::StorageError(e.to_string())), + } + } + } + + #[instrument(name = "SledBlobService::put", skip(self, blob_meta, blob_digest), fields(blob.digest = BASE64.encode(blob_digest)))] + fn put(&self, blob_digest: &[u8], blob_meta: proto::BlobMeta) -> Result<(), Error> { + let result = self.db.insert(blob_digest, blob_meta.encode_to_vec()); + if let Err(e) = result { + return Err(Error::StorageError(e.to_string())); + } + Ok(()) + // TODO: make sure all callers make sure the chunks exist. + // TODO: where should we calculate the bao? + } +} diff --git a/tvix/store/src/lib.rs b/tvix/store/src/lib.rs index e4d770c312a9..aa0fcd0c9619 100644 --- a/tvix/store/src/lib.rs +++ b/tvix/store/src/lib.rs @@ -2,6 +2,7 @@ pub mod client; mod errors; +pub mod blobservice; pub mod chunkservice; pub mod proto; -- cgit 1.4.1