about summary refs log tree commit diff
path: root/tvix/store/src/proto
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-03-15T23·01+0100
committerflokli <flokli@flokli.de>2023-03-16T13·47+0000
commitee23220564987771c8e7909ded6fb9853f1d1b0d (patch)
treeb1c99097c8912642c9f3582adebff49f9caa3093 /tvix/store/src/proto
parent9c08cbc9732710a0003cb7bbe0ff7a9950fc22b6 (diff)
refactor(tvix/store/directorysvc): use [u8; 32] instead of Vec<u8> r/6014
Also, simplify the trait interface, only allowing lookups of Directory
objects by their digest.

Change-Id: I6eec28a8cb0557bed9b69df8b8ff99a5e0f8fe35
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8313
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: tazjin <tazjin@tvl.su>
Diffstat (limited to 'tvix/store/src/proto')
-rw-r--r--tvix/store/src/proto/grpc_directoryservice_wrapper.rs68
-rw-r--r--tvix/store/src/proto/mod.rs4
-rw-r--r--tvix/store/src/proto/tests/directory.rs4
-rw-r--r--tvix/store/src/proto/tests/grpc_directoryservice.rs20
4 files changed, 57 insertions, 39 deletions
diff --git a/tvix/store/src/proto/grpc_directoryservice_wrapper.rs b/tvix/store/src/proto/grpc_directoryservice_wrapper.rs
index 049b1571c23f..e73dfdab1866 100644
--- a/tvix/store/src/proto/grpc_directoryservice_wrapper.rs
+++ b/tvix/store/src/proto/grpc_directoryservice_wrapper.rs
@@ -34,31 +34,33 @@ impl<DS: DirectoryService + Send + Sync + Clone + 'static>
 
         let req_inner = request.into_inner();
 
-        let client = self.directory_service.clone();
+        let directory_service = self.directory_service.clone();
 
         // kick off an async thread
         task::spawn(async move {
             // Keep the list of directory digests to traverse.
             // As per rpc_directory.proto, we traverse in BFS order.
-            let mut deq: VecDeque<Vec<u8>> = VecDeque::new();
+            let mut deq: VecDeque<[u8; 32]> = VecDeque::new();
 
             // look at the digest in the request and put it in the top of the queue.
             match &req_inner.by_what {
                 None => return Err(Status::invalid_argument("by_what needs to be specified")),
                 Some(proto::get_directory_request::ByWhat::Digest(digest)) => {
-                    if digest.len() != 32 {
-                        return Err(Status::invalid_argument("invalid digest length"));
-                    }
-                    deq.push_back(digest.clone());
+                    deq.push_back(
+                        digest
+                            .as_slice()
+                            .try_into()
+                            .map_err(|_e| Status::invalid_argument("invalid digest length"))?,
+                    );
                 }
             }
 
             // keep a list of all the Directory messages already sent, so we can omit sending the same.
-            let mut sent_directory_dgsts: HashSet<Vec<u8>> = HashSet::new();
+            let mut sent_directory_dgsts: HashSet<[u8; 32]> = HashSet::new();
 
             // look up the directory at the top of the queue
-            while let Some(ref digest) = deq.pop_front() {
-                let digest_b64: String = BASE64.encode(digest);
+            while let Some(digest) = deq.pop_front() {
+                let digest_b64: String = BASE64.encode(&digest);
 
                 // add digest we're currently processing to a span, but pay attention to
                 // https://docs.rs/tracing/0.1.37/tracing/span/struct.Span.html#in-asynchronous-code
@@ -69,9 +71,7 @@ impl<DS: DirectoryService + Send + Sync + Clone + 'static>
                     let _enter = span.enter();
 
                     // invoke client.get, and map to a Result<Directory, Status>
-                    match client.get(&proto::get_directory_request::ByWhat::Digest(
-                        digest.to_vec(),
-                    )) {
+                    match directory_service.get(&digest) {
                         // The directory was not found, abort
                         Ok(None) => {
                             if !sent_directory_dgsts.is_empty() {
@@ -94,10 +94,19 @@ impl<DS: DirectoryService + Send + Sync + Clone + 'static>
                             // Same applies to when it already is in the queue.
                             if req_inner.recursive {
                                 for child_directory_node in &directory.directories {
-                                    if !sent_directory_dgsts.contains(&child_directory_node.digest)
-                                        && !deq.contains(&child_directory_node.digest)
+                                    let child_directory_node_digest: [u8; 32] =
+                                        child_directory_node.digest.clone().try_into().map_err(
+                                            |_e| {
+                                                Status::internal(
+                                                    "invalid child directory digest len",
+                                                )
+                                            },
+                                        )?;
+
+                                    if !sent_directory_dgsts.contains(&child_directory_node_digest)
+                                        && !deq.contains(&child_directory_node_digest)
                                     {
-                                        deq.push_back(child_directory_node.digest.clone());
+                                        deq.push_back(child_directory_node_digest);
                                     }
                                 }
                             }
@@ -106,7 +115,7 @@ impl<DS: DirectoryService + Send + Sync + Clone + 'static>
                             // Strictly speaking, it wasn't sent yet, but tx.send happens right after,
                             // and the only way we can still fail is by the remote side to hang up,
                             // in which case we stop anyways.
-                            sent_directory_dgsts.insert(digest.to_vec());
+                            sent_directory_dgsts.insert(digest);
 
                             Ok(directory)
                         }
@@ -143,8 +152,8 @@ impl<DS: DirectoryService + Send + Sync + Clone + 'static>
         // This keeps track of the seen directory keys, and their size.
         // This is used to validate the size field of a reference to a previously sent directory.
         // We don't need to keep the contents around, they're stored in the DB.
-        let mut seen_directories_sizes: HashMap<Vec<u8>, u32> = HashMap::new();
-        let mut last_directory_dgst: Option<Vec<u8>> = None;
+        let mut seen_directories_sizes: HashMap<[u8; 32], u32> = HashMap::new();
+        let mut last_directory_dgst: Option<[u8; 32]> = None;
 
         // Consume directories, and insert them into the store.
         // Reject directory messages that refer to Directories not sent in the same stream.
@@ -162,12 +171,18 @@ impl<DS: DirectoryService + Send + Sync + Clone + 'static>
             // to ensure it has been seen already in this stream, and that the size
             // matches what we recorded.
             for child_directory in &directory.directories {
-                match seen_directories_sizes.get(&child_directory.digest) {
+                let child_directory_digest: [u8; 32] = child_directory
+                    .digest
+                    .clone()
+                    .try_into()
+                    .map_err(|_e| Status::internal("invalid child directory digest len"))?;
+
+                match seen_directories_sizes.get(&child_directory_digest) {
                     None => {
                         return Err(Status::invalid_argument(format!(
                             "child directory '{}' ({}) in directory '{}' not seen yet",
                             child_directory.name,
-                            BASE64.encode(&child_directory.digest),
+                            BASE64.encode(&child_directory_digest),
                             BASE64.encode(&directory.digest()),
                         )));
                     }
@@ -176,7 +191,7 @@ impl<DS: DirectoryService + Send + Sync + Clone + 'static>
                             return Err(Status::invalid_argument(format!(
                                     "child directory '{}' ({}) in directory '{}' referred with wrong size, expected {}, actual {}",
                                     child_directory.name,
-                                    BASE64.encode(&child_directory.digest),
+                                    BASE64.encode(&child_directory_digest),
                                     BASE64.encode(&directory.digest()),
                                     seen_child_directory_size,
                                     child_directory.size,
@@ -197,15 +212,12 @@ impl<DS: DirectoryService + Send + Sync + Clone + 'static>
             // does a reachability check.
 
             let dgst = directory.digest();
-            seen_directories_sizes.insert(dgst.clone(), directory.size());
-            last_directory_dgst = Some(dgst.clone());
+            seen_directories_sizes.insert(dgst, directory.size());
+            last_directory_dgst = Some(dgst);
 
             // check if the directory already exists in the database. We can skip
             // inserting if it's already there, as that'd be a no-op.
-            match self
-                .directory_service
-                .get(&proto::get_directory_request::ByWhat::Digest(dgst.to_vec()))
-            {
+            match self.directory_service.get(&dgst) {
                 Err(e) => {
                     warn!("error checking if directory already exists: {}", e);
                     return Err(e.into());
@@ -224,7 +236,7 @@ impl<DS: DirectoryService + Send + Sync + Clone + 'static>
         match last_directory_dgst {
             None => Err(Status::invalid_argument("no directories received")),
             Some(last_directory_dgst) => Ok(Response::new(proto::PutDirectoryResponse {
-                root_digest: last_directory_dgst,
+                root_digest: last_directory_dgst.to_vec(),
             })),
         }
     }
diff --git a/tvix/store/src/proto/mod.rs b/tvix/store/src/proto/mod.rs
index fceb6b5902df..f5945d650ad8 100644
--- a/tvix/store/src/proto/mod.rs
+++ b/tvix/store/src/proto/mod.rs
@@ -236,10 +236,10 @@ impl Directory {
 
     /// Calculates the digest of a Directory, which is the blake3 hash of a
     /// Directory protobuf message, serialized in protobuf canonical form.
-    pub fn digest(&self) -> Vec<u8> {
+    pub fn digest(&self) -> [u8; 32] {
         let mut hasher = blake3::Hasher::new();
 
-        hasher.update(&self.encode_to_vec()).finalize().as_bytes()[..].to_vec()
+        *hasher.update(&self.encode_to_vec()).finalize().as_bytes()
     }
 
     /// validate checks the directory for invalid data, such as:
diff --git a/tvix/store/src/proto/tests/directory.rs b/tvix/store/src/proto/tests/directory.rs
index 890cb2164ad5..03572629e606 100644
--- a/tvix/store/src/proto/tests/directory.rs
+++ b/tvix/store/src/proto/tests/directory.rs
@@ -2,7 +2,7 @@ use crate::proto::{Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDire
 use lazy_static::lazy_static;
 
 lazy_static! {
-    static ref DUMMY_DIGEST: Vec<u8> = vec![
+    static ref DUMMY_DIGEST: [u8; 32] = [
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
         0x00, 0x00,
@@ -66,7 +66,7 @@ fn digest() {
 
     assert_eq!(
         d.digest(),
-        vec![
+        [
             0xaf, 0x13, 0x49, 0xb9, 0xf5, 0xf9, 0xa1, 0xa6, 0xa0, 0x40, 0x4d, 0xea, 0x36, 0xdc,
             0xc9, 0x49, 0x9b, 0xcb, 0x25, 0xc9, 0xad, 0xc1, 0x12, 0xb7, 0xcc, 0x9a, 0x93, 0xca,
             0xe4, 0x1f, 0x32, 0x62
diff --git a/tvix/store/src/proto/tests/grpc_directoryservice.rs b/tvix/store/src/proto/tests/grpc_directoryservice.rs
index 464c25f33d75..37428fbfa339 100644
--- a/tvix/store/src/proto/tests/grpc_directoryservice.rs
+++ b/tvix/store/src/proto/tests/grpc_directoryservice.rs
@@ -44,7 +44,7 @@ async fn not_found() {
 
     let resp = service
         .get(tonic::Request::new(GetDirectoryRequest {
-            by_what: Some(ByWhat::Digest(DIRECTORY_A.digest())),
+            by_what: Some(ByWhat::Digest(DIRECTORY_A.digest().to_vec())),
             ..Default::default()
         }))
         .await;
@@ -114,14 +114,17 @@ async fn put_get_multiple() {
         .await
         .expect("must succeed");
 
-    assert_eq!(DIRECTORY_B.digest(), put_resp.into_inner().root_digest);
+    assert_eq!(
+        DIRECTORY_B.digest().to_vec(),
+        put_resp.into_inner().root_digest
+    );
 
     // now, request b, first in non-recursive mode.
     let items = get_directories(
         &service,
         GetDirectoryRequest {
             recursive: false,
-            by_what: Some(ByWhat::Digest(DIRECTORY_B.digest())),
+            by_what: Some(ByWhat::Digest(DIRECTORY_B.digest().to_vec())),
         },
     )
     .await
@@ -135,7 +138,7 @@ async fn put_get_multiple() {
         &service,
         GetDirectoryRequest {
             recursive: true,
-            by_what: Some(ByWhat::Digest(DIRECTORY_B.digest())),
+            by_what: Some(ByWhat::Digest(DIRECTORY_B.digest().to_vec())),
         },
     )
     .await
@@ -161,14 +164,17 @@ async fn put_get_dedup() {
         .await
         .expect("must succeed");
 
-    assert_eq!(DIRECTORY_C.digest(), put_resp.into_inner().root_digest);
+    assert_eq!(
+        DIRECTORY_C.digest().to_vec(),
+        put_resp.into_inner().root_digest
+    );
 
     // Ask for "C" recursively. We expect to only get "A" once, as there's no point sending it twice.
     let items = get_directories(
         &service,
         GetDirectoryRequest {
             recursive: true,
-            by_what: Some(ByWhat::Digest(DIRECTORY_C.digest())),
+            by_what: Some(ByWhat::Digest(DIRECTORY_C.digest().to_vec())),
         },
     )
     .await
@@ -211,7 +217,7 @@ async fn put_reject_wrong_size() {
     let broken_parent_directory = Directory {
         directories: vec![DirectoryNode {
             name: "foo".to_string(),
-            digest: DIRECTORY_A.digest(),
+            digest: DIRECTORY_A.digest().to_vec(),
             size: 42,
         }],
         ..Default::default()