From 3c340b28bdc812b9c13393c9e2bb378ba7dd9ec8 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Sat, 2 Sep 2023 21:16:35 +0300 Subject: refactor(tvix/{cli,store}): move TvixStoreIO to tvix-cli crate This trait is eval-specific, there's no point in dealing with these things in tvix-store. This implements the EvalIO interface for a Tvix store. The proper place for this glue code (for now) is tvix-cli, which knows about both tvix-store and tvix-eval. There's one annoyance with this move: The `tvix-store import` subcommand previously also used the TvixStoreIO implementation (because it conveniently did what we wanted). Some of this code had to be duplicated, mostly logic to calculate the NAR-based output path and create the PathInfo object. Some, but potentially more of this can be extracted into helper functions in a shared crate, and then be used from both TvixStoreIO in tvix-cli as well as the tvix-store CLI entrypoint. Change-Id: Ia7515e83c1b54f95baf810fbd8414c5521382d40 Reviewed-on: https://cl.tvl.fyi/c/depot/+/9212 Tested-by: BuildkiteCI Reviewed-by: tazjin Autosubmit: flokli --- tvix/cli/src/main.rs | 4 +- tvix/cli/src/tvix_io.rs | 1 + tvix/cli/src/tvix_store_io.rs | 322 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 326 insertions(+), 1 deletion(-) create mode 100644 tvix/cli/src/tvix_store_io.rs (limited to 'tvix/cli/src') diff --git a/tvix/cli/src/main.rs b/tvix/cli/src/main.rs index 5a7342e76593..1980ac1731e7 100644 --- a/tvix/cli/src/main.rs +++ b/tvix/cli/src/main.rs @@ -3,6 +3,7 @@ mod errors; mod known_paths; mod refscan; mod tvix_io; +mod tvix_store_io; use std::cell::RefCell; use std::rc::Rc; @@ -17,6 +18,7 @@ use tvix_eval::Value; use tvix_store::blobservice::MemoryBlobService; use tvix_store::directoryservice::MemoryDirectoryService; use tvix_store::pathinfoservice::MemoryPathInfoService; +use tvix_store_io::TvixStoreIO; #[derive(Parser)] struct Args { @@ -80,7 +82,7 @@ fn interpret(code: &str, path: Option, args: &Args, explain: bool) -> b eval.io_handle = Box::new(tvix_io::TvixIO::new( known_paths.clone(), - tvix_store::TvixStoreIO::new(blob_service, directory_service, path_info_service), + TvixStoreIO::new(blob_service, directory_service, path_info_service), )); // bundle fetchurl.nix (used in nixpkgs) by resolving to diff --git a/tvix/cli/src/tvix_io.rs b/tvix/cli/src/tvix_io.rs index b37dfc6a66a3..74f91138bff8 100644 --- a/tvix/cli/src/tvix_io.rs +++ b/tvix/cli/src/tvix_io.rs @@ -15,6 +15,7 @@ use std::path::{Path, PathBuf}; use std::rc::Rc; use tvix_eval::{EvalIO, FileType}; +// TODO: Merge this together with TvixStoreIO? pub(crate) struct TvixIO { /// Ingested paths must be reported to this known paths tracker /// for accurate build reference scanning. diff --git a/tvix/cli/src/tvix_store_io.rs b/tvix/cli/src/tvix_store_io.rs new file mode 100644 index 000000000000..2c673385342f --- /dev/null +++ b/tvix/cli/src/tvix_store_io.rs @@ -0,0 +1,322 @@ +//! This module provides an implementation of EvalIO talking to tvix-store. + +use nix_compat::store_path::{self, StorePath}; +use std::{io, path::Path, path::PathBuf, sync::Arc}; +use tracing::{error, instrument, warn}; +use tvix_eval::{EvalIO, FileType, StdIO}; + +use tvix_store::{ + blobservice::BlobService, + directoryservice::{self, DirectoryService}, + import, + nar::calculate_size_and_sha256, + pathinfoservice::PathInfoService, + proto::{node::Node, NamedNode, NarInfo, PathInfo}, + B3Digest, +}; + +/// Implements [EvalIO], asking given [PathInfoService], [DirectoryService] +/// and [BlobService]. +/// +/// In case the given path does not exist in these stores, we ask StdIO. +/// This is to both cover cases of syntactically valid store paths, that exist +/// on the filesystem (still managed by Nix), as well as being able to read +/// files outside store paths. +pub struct TvixStoreIO { + blob_service: Arc, + directory_service: Arc, + path_info_service: Arc, + std_io: StdIO, +} + +impl TvixStoreIO { + pub fn new( + blob_service: Arc, + directory_service: Arc, + path_info_service: Arc, + ) -> Self { + Self { + blob_service, + directory_service, + path_info_service, + std_io: StdIO {}, + } + } + + /// for a given [StorePath] and additional [Path] inside the store path, + /// look up the [PathInfo], and if it exists, and then use + /// [directoryservice::traverse_to] to return the + /// [Node] specified by `sub_path`. + #[instrument(skip(self), ret, err)] + fn store_path_to_root_node( + &self, + store_path: &StorePath, + sub_path: &Path, + ) -> Result, io::Error> { + let path_info = { + match self.path_info_service.get(store_path.digest)? { + // If there's no PathInfo found, early exit + None => return Ok(None), + Some(path_info) => path_info, + } + }; + + let root_node = { + match path_info.node { + None => { + warn!( + "returned PathInfo {:?} node is None, this shouldn't happen.", + &path_info + ); + return Ok(None); + } + Some(root_node) => match root_node.node { + None => { + warn!("node for {:?} is None, this shouldn't happen.", &root_node); + return Ok(None); + } + Some(root_node) => root_node, + }, + } + }; + + Ok(directoryservice::traverse_to( + self.directory_service.clone(), + root_node, + sub_path, + )?) + } + + /// Imports a given path on the filesystem into the store, and returns the + /// [PathInfo] describing the path, that was sent to + /// [PathInfoService]. + /// While not part of the [EvalIO], it's still useful for clients who + /// care about the [PathInfo]. + #[instrument(skip(self), ret, err)] + pub fn import_path_with_pathinfo(&self, path: &std::path::Path) -> Result { + // Call [import::ingest_path], which will walk over the given path and return a root_node. + let root_node = import::ingest_path( + self.blob_service.clone(), + self.directory_service.clone(), + path, + ) + .expect("error during import_path"); + + // Render the NAR + let (nar_size, nar_sha256) = calculate_size_and_sha256( + &root_node, + self.blob_service.clone(), + self.directory_service.clone(), + ) + .expect("error during nar calculation"); // TODO: handle error + + // TODO: make a path_to_name helper function? + let name = path + .file_name() + .expect("path must not be ..") + .to_str() + .expect("path must be valid unicode"); + + let output_path = store_path::build_nar_based_store_path(&nar_sha256, name); + + // assemble a new root_node with a name that is derived from the nar hash. + let root_node = root_node.rename(output_path.to_string().into_bytes().into()); + + // assemble the [PathInfo] object. + let path_info = PathInfo { + node: Some(tvix_store::proto::Node { + node: Some(root_node), + }), + // There's no reference scanning on path contents ingested like this. + references: vec![], + narinfo: Some(NarInfo { + nar_size, + nar_sha256: nar_sha256.to_vec().into(), + signatures: vec![], + reference_names: vec![], + // TODO: narinfo for talosctl.src contains `CA: fixed:r:sha256:1x13j5hy75221bf6kz7cpgld9vgic6bqx07w5xjs4pxnksj6lxb6` + // do we need this anywhere? + }), + }; + + // put into [PathInfoService], and return the [PathInfo] that we get + // back from there (it might contain additional signatures). + let path_info = self.path_info_service.put(path_info)?; + + Ok(path_info) + } +} + +impl EvalIO for TvixStoreIO { + #[instrument(skip(self), ret, err)] + fn path_exists(&self, path: &Path) -> Result { + if let Ok((store_path, sub_path)) = + StorePath::from_absolute_path_full(&path.to_string_lossy()) + { + if self + .store_path_to_root_node(&store_path, &sub_path)? + .is_some() + { + Ok(true) + } else { + // As tvix-store doesn't manage /nix/store on the filesystem, + // we still need to also ask self.std_io here. + self.std_io.path_exists(path) + } + } else { + // The store path is no store path, so do regular StdIO. + self.std_io.path_exists(path) + } + } + + #[instrument(skip(self), ret, err)] + fn read_to_string(&self, path: &Path) -> Result { + if let Ok((store_path, sub_path)) = + StorePath::from_absolute_path_full(&path.to_string_lossy()) + { + if let Some(node) = self.store_path_to_root_node(&store_path, &sub_path)? { + // depending on the node type, treat read_to_string differently + match node { + Node::Directory(_) => { + // This would normally be a io::ErrorKind::IsADirectory (still unstable) + Err(io::Error::new( + io::ErrorKind::Unsupported, + format!("tried to read directory at {:?} to string", path), + )) + } + Node::File(file_node) => { + let digest: B3Digest = + file_node.digest.clone().try_into().map_err(|_e| { + error!( + file_node = ?file_node, + "invalid digest" + ); + io::Error::new( + io::ErrorKind::InvalidData, + format!("invalid digest length in file node: {:?}", file_node), + ) + })?; + + let reader = { + let resp = self.blob_service.open_read(&digest)?; + match resp { + Some(blob_reader) => blob_reader, + None => { + error!( + blob.digest = %digest, + "blob not found", + ); + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("blob {} not found", &digest), + ))? + } + } + }; + + io::read_to_string(reader) + } + Node::Symlink(_symlink_node) => Err(io::Error::new( + io::ErrorKind::Unsupported, + "read_to_string for symlinks is unsupported", + ))?, + } + } else { + // As tvix-store doesn't manage /nix/store on the filesystem, + // we still need to also ask self.std_io here. + self.std_io.read_to_string(path) + } + } else { + // The store path is no store path, so do regular StdIO. + self.std_io.read_to_string(path) + } + } + + #[instrument(skip(self), ret, err)] + fn read_dir(&self, path: &Path) -> Result, io::Error> { + if let Ok((store_path, sub_path)) = + StorePath::from_absolute_path_full(&path.to_string_lossy()) + { + if let Some(node) = self.store_path_to_root_node(&store_path, &sub_path)? { + match node { + Node::Directory(directory_node) => { + // fetch the Directory itself. + let digest = directory_node.digest.clone().try_into().map_err(|_e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!( + "invalid digest length in directory node: {:?}", + directory_node + ), + ) + })?; + + if let Some(directory) = self.directory_service.get(&digest)? { + let mut children: Vec<(bytes::Bytes, FileType)> = Vec::new(); + for node in directory.nodes() { + children.push(match node { + Node::Directory(e) => (e.name, FileType::Directory), + Node::File(e) => (e.name, FileType::Regular), + Node::Symlink(e) => (e.name, FileType::Symlink), + }) + } + Ok(children) + } else { + // If we didn't get the directory node that's linked, that's a store inconsistency! + error!( + directory.digest = %digest, + path = ?path, + "directory not found", + ); + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("directory {digest} does not exist"), + ))? + } + } + Node::File(_file_node) => { + // This would normally be a io::ErrorKind::NotADirectory (still unstable) + Err(io::Error::new( + io::ErrorKind::Unsupported, + "tried to readdir path {:?}, which is a file", + ))? + } + Node::Symlink(_symlink_node) => Err(io::Error::new( + io::ErrorKind::Unsupported, + "read_dir for symlinks is unsupported", + ))?, + } + } else { + self.std_io.read_dir(path) + } + } else { + self.std_io.read_dir(path) + } + } + + #[instrument(skip(self), ret, err)] + fn import_path(&self, path: &std::path::Path) -> Result { + let path_info = self.import_path_with_pathinfo(path)?; + + // from the [PathInfo], extract the store path (as string). + Ok({ + let mut path = PathBuf::from(nix_compat::store_path::STORE_DIR_WITH_SLASH); + + let root_node_name = path_info.node.unwrap().node.unwrap().get_name().to_vec(); + + // This must be a string, otherwise it would have failed validation. + let root_node_name = String::from_utf8(root_node_name).unwrap(); + + // append to the PathBuf + path.push(root_node_name); + + // and return it + path + }) + } + + #[instrument(skip(self), ret)] + fn store_dir(&self) -> Option { + Some("/nix/store".to_string()) + } +} -- cgit 1.4.1