about summary refs log tree commit diff
path: root/tvix/castore/src/import
diff options
context:
space:
mode:
authorYureka <tvl@yuka.dev>2024-07-29T12·34+0200
committeryuka <tvl@yuka.dev>2024-08-13T12·17+0000
commit3ca0b53840b352b24f3a315404df11458b0bdbbb (patch)
tree2635e8d67d0c334cc5760dc4205b7515c6283a77 /tvix/castore/src/import
parent5d3f3158d6102baee48d2772e85f05cfc1fac95e (diff)
refactor(tvix/castore): use Directory struct separate from proto one r/8484
This uses our own data type to deal with Directories in the castore model.

It makes some undesired states unrepresentable, removing the need for conversions and checking in various places:

 - In the protobuf, blake3 digests could have a wrong length, as proto doesn't know fixed-size fields. We now use `B3Digest`, which makes cloning cheaper, and removes the need to do size-checking everywhere.
 - In the protobuf, we had three different lists for `files`, `symlinks` and `directories`. This was mostly a protobuf size optimization, but made interacting with them a bit awkward. This has now been replaced with a list of enums, and convenience iterators to get various nodes, and add new ones.

Change-Id: I7b92691bb06d77ff3f58a5ccea94a22c16f84f04
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12057
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
Diffstat (limited to 'tvix/castore/src/import')
-rw-r--r--tvix/castore/src/import/archive.rs3
-rw-r--r--tvix/castore/src/import/fs.rs3
-rw-r--r--tvix/castore/src/import/mod.rs84
3 files changed, 49 insertions, 41 deletions
diff --git a/tvix/castore/src/import/archive.rs b/tvix/castore/src/import/archive.rs
index cd5b1290e031..930f33f68b46 100644
--- a/tvix/castore/src/import/archive.rs
+++ b/tvix/castore/src/import/archive.rs
@@ -11,9 +11,8 @@ use tokio_tar::Archive;
 use tracing::{instrument, warn, Level};
 
 use crate::blobservice::BlobService;
-use crate::directoryservice::DirectoryService;
+use crate::directoryservice::{DirectoryService, Node};
 use crate::import::{ingest_entries, IngestionEntry, IngestionError};
-use crate::proto::node::Node;
 
 use super::blobs::{self, ConcurrentBlobUploader};
 
diff --git a/tvix/castore/src/import/fs.rs b/tvix/castore/src/import/fs.rs
index dc7821b8101e..f156c8a73527 100644
--- a/tvix/castore/src/import/fs.rs
+++ b/tvix/castore/src/import/fs.rs
@@ -15,8 +15,7 @@ use walkdir::DirEntry;
 use walkdir::WalkDir;
 
 use crate::blobservice::BlobService;
-use crate::directoryservice::DirectoryService;
-use crate::proto::node::Node;
+use crate::directoryservice::{DirectoryService, Node};
 use crate::B3Digest;
 
 use super::ingest_entries;
diff --git a/tvix/castore/src/import/mod.rs b/tvix/castore/src/import/mod.rs
index a9ac0be6b064..03e2b6c7db41 100644
--- a/tvix/castore/src/import/mod.rs
+++ b/tvix/castore/src/import/mod.rs
@@ -4,14 +4,14 @@
 //! Specific implementations, such as ingesting from the filesystem, live in
 //! child modules.
 
+use crate::directoryservice::Directory;
+use crate::directoryservice::DirectoryNode;
 use crate::directoryservice::DirectoryPutter;
 use crate::directoryservice::DirectoryService;
+use crate::directoryservice::FileNode;
+use crate::directoryservice::Node;
+use crate::directoryservice::SymlinkNode;
 use crate::path::{Path, PathBuf};
-use crate::proto::node::Node;
-use crate::proto::Directory;
-use crate::proto::DirectoryNode;
-use crate::proto::FileNode;
-use crate::proto::SymlinkNode;
 use crate::B3Digest;
 use futures::{Stream, StreamExt};
 use tracing::Level;
@@ -98,27 +98,36 @@ where
                         IngestionError::UploadDirectoryError(entry.path().to_owned(), e)
                     })?;
 
-                Node::Directory(DirectoryNode {
-                    name,
-                    digest: directory_digest.into(),
-                    size: directory_size,
-                })
+                Node::Directory(
+                    DirectoryNode::new(name, directory_digest, directory_size).map_err(|e| {
+                        IngestionError::UploadDirectoryError(
+                            entry.path().to_owned(),
+                            crate::Error::StorageError(e.to_string()),
+                        )
+                    })?,
+                )
             }
-            IngestionEntry::Symlink { ref target, .. } => Node::Symlink(SymlinkNode {
-                name,
-                target: target.to_owned().into(),
-            }),
+            IngestionEntry::Symlink { ref target, .. } => Node::Symlink(
+                SymlinkNode::new(name, target.to_owned().into()).map_err(|e| {
+                    IngestionError::UploadDirectoryError(
+                        entry.path().to_owned(),
+                        crate::Error::StorageError(e.to_string()),
+                    )
+                })?,
+            ),
             IngestionEntry::Regular {
                 size,
                 executable,
                 digest,
                 ..
-            } => Node::File(FileNode {
-                name,
-                digest: digest.to_owned().into(),
-                size: *size,
-                executable: *executable,
-            }),
+            } => Node::File(
+                FileNode::new(name, digest.clone(), *size, *executable).map_err(|e| {
+                    IngestionError::UploadDirectoryError(
+                        entry.path().to_owned(),
+                        crate::Error::StorageError(e.to_string()),
+                    )
+                })?,
+            ),
         };
 
         let parent = entry
@@ -130,7 +139,16 @@ where
             break node;
         } else {
             // record node in parent directory, creating a new [Directory] if not there yet.
-            directories.entry(parent.to_owned()).or_default().add(node);
+            directories
+                .entry(parent.to_owned())
+                .or_default()
+                .add(node)
+                .map_err(|e| {
+                    IngestionError::UploadDirectoryError(
+                        entry.path().to_owned(),
+                        crate::Error::StorageError(e.to_string()),
+                    )
+                })?;
         }
     };
 
@@ -156,14 +174,7 @@ where
         #[cfg(debug_assertions)]
         {
             if let Node::Directory(directory_node) = &root_node {
-                debug_assert_eq!(
-                    root_directory_digest,
-                    directory_node
-                        .digest
-                        .to_vec()
-                        .try_into()
-                        .expect("invalid digest len")
-                )
+                debug_assert_eq!(&root_directory_digest, directory_node.digest())
             } else {
                 unreachable!("Tvix bug: directory putter initialized but no root directory node");
             }
@@ -208,9 +219,8 @@ impl IngestionEntry {
 mod test {
     use rstest::rstest;
 
+    use crate::directoryservice::{Directory, DirectoryNode, FileNode, Node, SymlinkNode};
     use crate::fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP, EMPTY_BLOB_DIGEST};
-    use crate::proto::node::Node;
-    use crate::proto::{Directory, DirectoryNode, FileNode, SymlinkNode};
     use crate::{directoryservice::MemoryDirectoryService, fixtures::DUMMY_DIGEST};
 
     use super::ingest_entries;
@@ -223,18 +233,18 @@ mod test {
         executable: true,
         digest: DUMMY_DIGEST.clone(),
     }],
-        Node::File(FileNode { name: "foo".into(), digest: DUMMY_DIGEST.clone().into(), size: 42, executable: true }
-    ))]
+        Node::File(FileNode::new("foo".into(), DUMMY_DIGEST.clone(), 42, true).unwrap())
+    )]
     #[case::single_symlink(vec![IngestionEntry::Symlink {
         path: "foo".parse().unwrap(),
         target: b"blub".into(),
     }],
-        Node::Symlink(SymlinkNode { name: "foo".into(), target: "blub".into()})
+        Node::Symlink(SymlinkNode::new("foo".into(), "blub".into()).unwrap())
     )]
     #[case::single_dir(vec![IngestionEntry::Dir {
         path: "foo".parse().unwrap(),
     }],
-        Node::Directory(DirectoryNode { name: "foo".into(), digest: Directory::default().digest().into(), size: Directory::default().size()})
+        Node::Directory(DirectoryNode::new("foo".into(), Directory::default().digest(), Directory::default().size()).unwrap())
     )]
     #[case::dir_with_keep(vec![
         IngestionEntry::Regular {
@@ -247,7 +257,7 @@ mod test {
             path: "foo".parse().unwrap(),
         },
     ],
-        Node::Directory(DirectoryNode { name: "foo".into(), digest: DIRECTORY_WITH_KEEP.digest().into(), size: DIRECTORY_WITH_KEEP.size() })
+        Node::Directory(DirectoryNode::new("foo".into(), DIRECTORY_WITH_KEEP.digest(), DIRECTORY_WITH_KEEP.size()).unwrap())
     )]
     /// This is intentionally a bit unsorted, though it still satisfies all
     /// requirements we have on the order of elements in the stream.
@@ -275,7 +285,7 @@ mod test {
             path: "blub".parse().unwrap(),
         },
     ],
-        Node::Directory(DirectoryNode { name: "blub".into(), digest: DIRECTORY_COMPLICATED.digest().into(), size:DIRECTORY_COMPLICATED.size() })
+    Node::Directory(DirectoryNode::new("blub".into(), DIRECTORY_COMPLICATED.digest(), DIRECTORY_COMPLICATED.size()).unwrap())
     )]
     #[tokio::test]
     async fn test_ingestion(#[case] entries: Vec<IngestionEntry>, #[case] exp_root_node: Node) {