diff options
author | Florian Klink <flokli@flokli.de> | 2023-05-11T12·49+0300 |
---|---|---|
committer | flokli <flokli@flokli.de> | 2023-05-11T14·27+0000 |
commit | 616fa4476f93e1782e68dc713e9e8cb77a426c7d (patch) | |
tree | f76a43e95c75d848d079706fbccfd442210ebebc /tvix/store/src/import.rs | |
parent | b22b685f0b2524c088deacbf4e80e7b7c73b5afc (diff) |
refactor(tvix/store): remove ChunkService r/6133
Whether chunking is involved or not, is an implementation detail of each Blobstore. Consumers of a whole blob shouldn't need to worry about that. It currently is not visible in the gRPC interface either. It shouldn't bleed into everything. Let the BlobService trait provide `open_read` and `open_write` methods, which return handles providing io::Read or io::Write, and leave the details up to the implementation. This means, our custom BlobReader module can go away, and all the chunking bits in there, too. In the future, we might still want to add more chunking-aware syncing, but as a syncing strategy some stores can expose, not as a fundamental protocol component. This currently needs "SyncReadIntoAsyncRead", taken and vendored in from https://github.com/tokio-rs/tokio/pull/5669. It provides a AsyncRead for a sync Read, which is necessary to connect our (sync) BlobReader interface to a GRPC server implementation. As an alternative, we could also make the BlobReader itself async, and let consumers of the trait (EvalIO) deal with the async-ness, but this is less of a change for now. In terms of vendoring, I initially tried to move our tokio crate to these commits, but ended up in version incompatibilities, so let's vendor it in for now. Change-Id: I5969ebbc4c0e1ceece47981be3b9e7cfb3f59ad0 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8551 Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su>
Diffstat (limited to 'tvix/store/src/import.rs')
-rw-r--r-- | tvix/store/src/import.rs | 54 |
1 files changed, 16 insertions, 38 deletions
diff --git a/tvix/store/src/import.rs b/tvix/store/src/import.rs index e62097ec468d..bf80eb4b71b9 100644 --- a/tvix/store/src/import.rs +++ b/tvix/store/src/import.rs @@ -1,19 +1,17 @@ -use crate::{chunkservice::read_all_and_chunk, directoryservice::DirectoryPutter, proto}; +use crate::{blobservice::BlobService, directoryservice::DirectoryService}; +use crate::{blobservice::BlobWriter, directoryservice::DirectoryPutter, proto}; use std::{ collections::HashMap, fmt::Debug, fs, fs::File, + io, os::unix::prelude::PermissionsExt, path::{Path, PathBuf}, }; use tracing::instrument; use walkdir::WalkDir; -use crate::{ - blobservice::BlobService, chunkservice::ChunkService, directoryservice::DirectoryService, -}; - #[derive(Debug, thiserror::Error)] pub enum Error { #[error("failed to upload directory at {0}: {1}")] @@ -57,9 +55,8 @@ impl From<super::Error> for Error { // // It assumes the caller adds returned nodes to the directories it assembles. #[instrument(skip_all, fields(entry.file_type=?&entry.file_type(),entry.path=?entry.path()))] -fn process_entry<BS: BlobService, CS: ChunkService + std::marker::Sync, DP: DirectoryPutter>( +fn process_entry<BS: BlobService, DP: DirectoryPutter>( blob_service: &mut BS, - chunk_service: &mut CS, directory_putter: &mut DP, entry: &walkdir::DirEntry, maybe_directory: Option<proto::Directory>, @@ -112,23 +109,16 @@ fn process_entry<BS: BlobService, CS: ChunkService + std::marker::Sync, DP: Dire .metadata() .map_err(|e| Error::UnableToStat(entry_path.clone(), e.into()))?; - let file = File::open(entry_path.clone()) + let mut file = File::open(entry_path.clone()) .map_err(|e| Error::UnableToOpen(entry_path.clone(), e))?; - let (blob_digest, blob_meta) = read_all_and_chunk(chunk_service, file)?; - - // upload blobmeta if not there yet - if blob_service - .stat(&proto::StatBlobRequest { - digest: blob_digest.to_vec(), - include_chunks: false, - include_bao: false, - })? - .is_none() - { - // upload blobmeta - blob_service.put(&blob_digest, blob_meta)?; - } + let mut writer = blob_service.open_write()?; + + if let Err(e) = io::copy(&mut file, &mut writer) { + return Err(Error::UnableToRead(entry_path, e)); + }; + + let digest = writer.close()?; return Ok(proto::node::Node::File(proto::FileNode { name: entry @@ -136,7 +126,7 @@ fn process_entry<BS: BlobService, CS: ChunkService + std::marker::Sync, DP: Dire .to_str() .map(|s| Ok(s.to_owned())) .unwrap_or(Err(Error::InvalidEncoding(entry.path().to_path_buf())))?, - digest: blob_digest, + digest: digest.to_vec(), size: metadata.len() as u32, // If it's executable by the user, it'll become executable. // This matches nix's dump() function behaviour. @@ -152,15 +142,9 @@ fn process_entry<BS: BlobService, CS: ChunkService + std::marker::Sync, DP: Dire /// to the PathInfoService. // // returns the root node, or an error. -#[instrument(skip(blob_service, chunk_service, directory_service), fields(path=?p))] -pub fn import_path< - BS: BlobService, - CS: ChunkService + std::marker::Sync, - DS: DirectoryService, - P: AsRef<Path> + Debug, ->( +#[instrument(skip(blob_service, directory_service), fields(path=?p))] +pub fn import_path<BS: BlobService, DS: DirectoryService, P: AsRef<Path> + Debug>( blob_service: &mut BS, - chunk_service: &mut CS, directory_service: &mut DS, p: P, ) -> Result<proto::node::Node, Error> { @@ -212,13 +196,7 @@ pub fn import_path< } }; - let node = process_entry( - blob_service, - chunk_service, - &mut directory_putter, - &entry, - maybe_directory, - )?; + let node = process_entry(blob_service, &mut directory_putter, &entry, maybe_directory)?; if entry.depth() == 0 { return Ok(node); |