From 32f41458c0a0f62bf906021ef096c465ccc45581 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Thu, 21 Sep 2023 22:32:44 +0300 Subject: refactor(tvix): move castore into tvix-castore crate This splits the pure content-addressed layers from tvix-store into a `castore` crate, and only leaves PathInfo related things, as well as the CLI entrypoint in the tvix-store crate. Notable changes: - `fixtures` and `utils` had to be moved out of the `test` cfg, so they can be imported from tvix-store. - Some ad-hoc fixtures in the test were moved to proper fixtures in the same step. - The protos are now created by a (more static) recipe in the protos/ directory. The (now two) golang targets are commented out, as it's not possible to update them properly in the same CL. This will be done by a followup CL once this is merged (and whitby deployed) Bug: https://b.tvl.fyi/issues/301 Change-Id: I8d675d4bf1fb697eb7d479747c1b1e3635718107 Reviewed-on: https://cl.tvl.fyi/c/depot/+/9370 Reviewed-by: tazjin Reviewed-by: flokli Autosubmit: flokli Tested-by: BuildkiteCI Reviewed-by: Connor Brewster --- tvix/castore/Cargo.toml | 42 ++ tvix/castore/build.rs | 38 ++ tvix/castore/default.nix | 5 + tvix/castore/protos/LICENSE | 21 + tvix/castore/protos/castore.go | 164 ++++++ tvix/castore/protos/castore.pb.go | 580 +++++++++++++++++++++ tvix/castore/protos/castore.proto | 72 +++ tvix/castore/protos/castore_test.go | 271 ++++++++++ tvix/castore/protos/go.mod | 19 + tvix/castore/protos/go.sum | 96 ++++ tvix/castore/protos/rpc_blobstore.pb.go | 414 +++++++++++++++ tvix/castore/protos/rpc_blobstore.proto | 52 ++ tvix/castore/protos/rpc_blobstore_grpc.pb.go | 274 ++++++++++ tvix/castore/protos/rpc_directory.pb.go | 273 ++++++++++ tvix/castore/protos/rpc_directory.proto | 48 ++ tvix/castore/protos/rpc_directory_grpc.pb.go | 238 +++++++++ tvix/castore/src/blobservice/from_addr.rs | 30 ++ tvix/castore/src/blobservice/grpc.rs | 426 +++++++++++++++ tvix/castore/src/blobservice/memory.rs | 196 +++++++ tvix/castore/src/blobservice/mod.rs | 62 +++ tvix/castore/src/blobservice/naive_seeker.rs | 269 ++++++++++ tvix/castore/src/blobservice/sled.rs | 249 +++++++++ tvix/castore/src/blobservice/tests.rs | 246 +++++++++ tvix/castore/src/digests.rs | 72 +++ tvix/castore/src/directoryservice/from_addr.rs | 36 ++ tvix/castore/src/directoryservice/grpc.rs | 541 +++++++++++++++++++ tvix/castore/src/directoryservice/memory.rs | 149 ++++++ tvix/castore/src/directoryservice/mod.rs | 76 +++ tvix/castore/src/directoryservice/sled.rs | 213 ++++++++ tvix/castore/src/directoryservice/traverse.rs | 228 ++++++++ tvix/castore/src/directoryservice/utils.rs | 140 +++++ tvix/castore/src/errors.rs | 45 ++ tvix/castore/src/fixtures.rs | 95 ++++ tvix/castore/src/import.rs | 205 ++++++++ tvix/castore/src/lib.rs | 15 + tvix/castore/src/proto/grpc_blobservice_wrapper.rs | 177 +++++++ .../src/proto/grpc_directoryservice_wrapper.rs | 184 +++++++ tvix/castore/src/proto/mod.rs | 279 ++++++++++ tvix/castore/src/proto/tests/directory.rs | 287 ++++++++++ .../src/proto/tests/directory_nodes_iterator.rs | 78 +++ tvix/castore/src/proto/tests/grpc_blobservice.rs | 100 ++++ .../src/proto/tests/grpc_directoryservice.rs | 239 +++++++++ tvix/castore/src/proto/tests/mod.rs | 4 + tvix/castore/src/tests/import.rs | 124 +++++ tvix/castore/src/tests/mod.rs | 1 + tvix/castore/src/utils.rs | 19 + 46 files changed, 7392 insertions(+) create mode 100644 tvix/castore/Cargo.toml create mode 100644 tvix/castore/build.rs create mode 100644 tvix/castore/default.nix create mode 100644 tvix/castore/protos/LICENSE create mode 100644 tvix/castore/protos/castore.go create mode 100644 tvix/castore/protos/castore.pb.go create mode 100644 tvix/castore/protos/castore.proto create mode 100644 tvix/castore/protos/castore_test.go create mode 100644 tvix/castore/protos/go.mod create mode 100644 tvix/castore/protos/go.sum create mode 100644 tvix/castore/protos/rpc_blobstore.pb.go create mode 100644 tvix/castore/protos/rpc_blobstore.proto create mode 100644 tvix/castore/protos/rpc_blobstore_grpc.pb.go create mode 100644 tvix/castore/protos/rpc_directory.pb.go create mode 100644 tvix/castore/protos/rpc_directory.proto create mode 100644 tvix/castore/protos/rpc_directory_grpc.pb.go create mode 100644 tvix/castore/src/blobservice/from_addr.rs create mode 100644 tvix/castore/src/blobservice/grpc.rs create mode 100644 tvix/castore/src/blobservice/memory.rs create mode 100644 tvix/castore/src/blobservice/mod.rs create mode 100644 tvix/castore/src/blobservice/naive_seeker.rs create mode 100644 tvix/castore/src/blobservice/sled.rs create mode 100644 tvix/castore/src/blobservice/tests.rs create mode 100644 tvix/castore/src/digests.rs create mode 100644 tvix/castore/src/directoryservice/from_addr.rs create mode 100644 tvix/castore/src/directoryservice/grpc.rs create mode 100644 tvix/castore/src/directoryservice/memory.rs create mode 100644 tvix/castore/src/directoryservice/mod.rs create mode 100644 tvix/castore/src/directoryservice/sled.rs create mode 100644 tvix/castore/src/directoryservice/traverse.rs create mode 100644 tvix/castore/src/directoryservice/utils.rs create mode 100644 tvix/castore/src/errors.rs create mode 100644 tvix/castore/src/fixtures.rs create mode 100644 tvix/castore/src/import.rs create mode 100644 tvix/castore/src/lib.rs create mode 100644 tvix/castore/src/proto/grpc_blobservice_wrapper.rs create mode 100644 tvix/castore/src/proto/grpc_directoryservice_wrapper.rs create mode 100644 tvix/castore/src/proto/mod.rs create mode 100644 tvix/castore/src/proto/tests/directory.rs create mode 100644 tvix/castore/src/proto/tests/directory_nodes_iterator.rs create mode 100644 tvix/castore/src/proto/tests/grpc_blobservice.rs create mode 100644 tvix/castore/src/proto/tests/grpc_directoryservice.rs create mode 100644 tvix/castore/src/proto/tests/mod.rs create mode 100644 tvix/castore/src/tests/import.rs create mode 100644 tvix/castore/src/tests/mod.rs create mode 100644 tvix/castore/src/utils.rs (limited to 'tvix/castore') diff --git a/tvix/castore/Cargo.toml b/tvix/castore/Cargo.toml new file mode 100644 index 000000000000..8bef5bb147bc --- /dev/null +++ b/tvix/castore/Cargo.toml @@ -0,0 +1,42 @@ +[package] +name = "tvix-castore" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = "1.0.68" +async-stream = "0.3.5" +blake3 = { version = "1.3.1", features = ["rayon", "std"] } +bytes = "1.4.0" +data-encoding = "2.3.3" +futures = "0.3.28" +lazy_static = "1.4.0" +pin-project-lite = "0.2.13" +prost = "0.11.2" +sled = { version = "0.34.7", features = ["compression"] } +thiserror = "1.0.38" +tokio-stream = { version = "0.1.14", features = ["fs", "net"] } +tokio-util = { version = "0.7.8", features = ["io", "io-util"] } +tokio = { version = "1.28.0", features = ["fs", "net", "rt-multi-thread", "signal"] } +tonic = "0.8.2" +tower = "0.4.13" +tracing = "0.1.37" +url = "2.4.0" +walkdir = "2.4.0" + +[dependencies.tonic-reflection] +optional = true +version = "0.5.0" + +[build-dependencies] +prost-build = "0.11.2" +tonic-build = "0.8.2" + +[dev-dependencies] +test-case = "2.2.2" +tempfile = "3.3.0" +tonic-mock = { git = "https://github.com/brainrake/tonic-mock", branch = "bump-dependencies" } + +[features] +default = ["reflection"] +reflection = ["tonic-reflection"] \ No newline at end of file diff --git a/tvix/castore/build.rs b/tvix/castore/build.rs new file mode 100644 index 000000000000..9f06c8833319 --- /dev/null +++ b/tvix/castore/build.rs @@ -0,0 +1,38 @@ +use std::io::Result; + +fn main() -> Result<()> { + #[allow(unused_mut)] + let mut builder = tonic_build::configure(); + + #[cfg(feature = "reflection")] + { + let out_dir = std::path::PathBuf::from(std::env::var("OUT_DIR").unwrap()); + let descriptor_path = out_dir.join("tvix.castore.v1.bin"); + + builder = builder.file_descriptor_set_path(descriptor_path); + }; + + // https://github.com/hyperium/tonic/issues/908 + let mut config = prost_build::Config::new(); + config.bytes(["."]); + + builder + .build_server(true) + .build_client(true) + .compile_with_config( + config, + &[ + "tvix/castore/protos/castore.proto", + "tvix/castore/protos/rpc_blobstore.proto", + "tvix/castore/protos/rpc_directory.proto", + ], + // If we are in running `cargo build` manually, using `../..` works fine, + // but in case we run inside a nix build, we need to instead point PROTO_ROOT + // to a sparseTree containing that structure. + &[match std::env::var_os("PROTO_ROOT") { + Some(proto_root) => proto_root.to_str().unwrap().to_owned(), + None => "../..".to_string(), + }], + )?; + Ok(()) +} diff --git a/tvix/castore/default.nix b/tvix/castore/default.nix new file mode 100644 index 000000000000..09f6c0bea343 --- /dev/null +++ b/tvix/castore/default.nix @@ -0,0 +1,5 @@ +{ depot, pkgs, ... }: + +depot.tvix.crates.workspaceMembers.tvix-castore.build.override { + runTests = true; +} diff --git a/tvix/castore/protos/LICENSE b/tvix/castore/protos/LICENSE new file mode 100644 index 000000000000..2034ada6fd9a --- /dev/null +++ b/tvix/castore/protos/LICENSE @@ -0,0 +1,21 @@ +Copyright © The Tvix Authors + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +“Software”), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/tvix/castore/protos/castore.go b/tvix/castore/protos/castore.go new file mode 100644 index 000000000000..102ba4bff75d --- /dev/null +++ b/tvix/castore/protos/castore.go @@ -0,0 +1,164 @@ +package castorev1 + +import ( + "bytes" + "encoding/base64" + "fmt" + "google.golang.org/protobuf/proto" + "lukechampine.com/blake3" +) + +// The size of a directory is calculated by summing up the numbers of +// `directories`, `files` and `symlinks`, and for each directory, its size +// field. +func (d *Directory) Size() uint32 { + var size uint32 + size = uint32(len(d.Files) + len(d.Symlinks)) + for _, d := range d.Directories { + size += 1 + d.Size + } + return size +} + +func (d *Directory) Digest() ([]byte, error) { + b, err := proto.MarshalOptions{ + Deterministic: true, + }.Marshal(d) + + if err != nil { + return nil, fmt.Errorf("error while marshalling directory: %w", err) + } + + h := blake3.New(32, nil) + + _, err = h.Write(b) + if err != nil { + return nil, fmt.Errorf("error writing to hasher: %w", err) + } + + return h.Sum(nil), nil +} + +// isValidName checks a name for validity. +// We disallow slashes, null bytes, '.', '..' and the empty string. +// Depending on the context, a *Node message with an empty string as name is +// allowed, but they don't occur inside a Directory message. +func isValidName(n []byte) bool { + if len(n) == 0 || bytes.Equal(n, []byte("..")) || bytes.Equal(n, []byte{'.'}) || bytes.Contains(n, []byte{'\x00'}) || bytes.Contains(n, []byte{'/'}) { + return false + } + return true +} + +// Validate thecks the Directory message for invalid data, such as: +// - violations of name restrictions +// - invalid digest lengths +// - not properly sorted lists +// - duplicate names in the three lists +func (d *Directory) Validate() error { + // seenNames contains all seen names so far. + // We populate this to ensure node names are unique across all three lists. + seenNames := make(map[string]interface{}) + + // We also track the last seen name in each of the three lists, + // to ensure nodes are sorted by their names. + var lastDirectoryName, lastFileName, lastSymlinkName []byte + + // helper function to only insert in sorted order. + // used with the three lists above. + // Note this consumes a *pointer to* a string, as it mutates it. + insertIfGt := func(lastName *[]byte, name []byte) error { + // update if it's greater than the previous name + if bytes.Compare(name, *lastName) == 1 { + *lastName = name + return nil + } else { + return fmt.Errorf("%v is not in sorted order", name) + } + } + + // insertOnce inserts into seenNames if the key doesn't exist yet. + insertOnce := func(name []byte) error { + encoded := base64.StdEncoding.EncodeToString(name) + if _, found := seenNames[encoded]; found { + return fmt.Errorf("duplicate name: %v", string(name)) + } + seenNames[encoded] = nil + return nil + } + + // Loop over all Directories, Files and Symlinks individually. + // Check the name for validity, check a potential digest for length, + // then check for sorting in the current list, and uniqueness across all three lists. + for _, directoryNode := range d.Directories { + directoryName := directoryNode.GetName() + + // check name for validity + if !isValidName(directoryName) { + return fmt.Errorf("invalid name for DirectoryNode: %v", directoryName) + } + + // check digest to be 32 bytes + digestLen := len(directoryNode.GetDigest()) + if digestLen != 32 { + return fmt.Errorf("invalid digest length for DirectoryNode: %d", digestLen) + } + + // ensure names are sorted + if err := insertIfGt(&lastDirectoryName, directoryName); err != nil { + return err + } + + // add to seenNames + if err := insertOnce(directoryName); err != nil { + return err + } + + } + + for _, fileNode := range d.Files { + fileName := fileNode.GetName() + + // check name for validity + if !isValidName(fileName) { + return fmt.Errorf("invalid name for FileNode: %v", fileName) + } + + // check digest to be 32 bytes + digestLen := len(fileNode.GetDigest()) + if digestLen != 32 { + return fmt.Errorf("invalid digest length for FileNode: %d", digestLen) + } + + // ensure names are sorted + if err := insertIfGt(&lastFileName, fileName); err != nil { + return err + } + + // add to seenNames + if err := insertOnce(fileName); err != nil { + return err + } + } + + for _, symlinkNode := range d.Symlinks { + symlinkName := symlinkNode.GetName() + + // check name for validity + if !isValidName(symlinkName) { + return fmt.Errorf("invalid name for SymlinkNode: %v", symlinkName) + } + + // ensure names are sorted + if err := insertIfGt(&lastSymlinkName, symlinkName); err != nil { + return err + } + + // add to seenNames + if err := insertOnce(symlinkName); err != nil { + return err + } + } + + return nil +} diff --git a/tvix/castore/protos/castore.pb.go b/tvix/castore/protos/castore.pb.go new file mode 100644 index 000000000000..5323d6c923a8 --- /dev/null +++ b/tvix/castore/protos/castore.pb.go @@ -0,0 +1,580 @@ +// SPDX-FileCopyrightText: edef +// SPDX-License-Identifier: OSL-3.0 OR MIT OR Apache-2.0 + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.31.0 +// protoc (unknown) +// source: tvix/castore/protos/castore.proto + +package castorev1 + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// A Directory can contain Directory, File or Symlink nodes. +// Each of these nodes have a name attribute, which is the basename in that directory +// and node type specific attributes. +// The name attribute: +// - MUST not contain slashes or null bytes +// - MUST not be '.' or '..' +// - MUST be unique across all three lists +// +// Elements in each list need to be lexicographically ordered by the name +// attribute. +type Directory struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Directories []*DirectoryNode `protobuf:"bytes,1,rep,name=directories,proto3" json:"directories,omitempty"` + Files []*FileNode `protobuf:"bytes,2,rep,name=files,proto3" json:"files,omitempty"` + Symlinks []*SymlinkNode `protobuf:"bytes,3,rep,name=symlinks,proto3" json:"symlinks,omitempty"` +} + +func (x *Directory) Reset() { + *x = Directory{} + if protoimpl.UnsafeEnabled { + mi := &file_tvix_castore_protos_castore_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Directory) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Directory) ProtoMessage() {} + +func (x *Directory) ProtoReflect() protoreflect.Message { + mi := &file_tvix_castore_protos_castore_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Directory.ProtoReflect.Descriptor instead. +func (*Directory) Descriptor() ([]byte, []int) { + return file_tvix_castore_protos_castore_proto_rawDescGZIP(), []int{0} +} + +func (x *Directory) GetDirectories() []*DirectoryNode { + if x != nil { + return x.Directories + } + return nil +} + +func (x *Directory) GetFiles() []*FileNode { + if x != nil { + return x.Files + } + return nil +} + +func (x *Directory) GetSymlinks() []*SymlinkNode { + if x != nil { + return x.Symlinks + } + return nil +} + +// A DirectoryNode represents a directory in a Directory. +type DirectoryNode struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The (base)name of the directory + Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + // The blake3 hash of a Directory message, serialized in protobuf canonical form. + Digest []byte `protobuf:"bytes,2,opt,name=digest,proto3" json:"digest,omitempty"` + // Number of child elements in the Directory referred to by `digest`. + // Calculated by summing up the numbers of `directories`, `files` and + // `symlinks`, and for each directory, its size field. Used for inode + // number calculation. + // This field is precisely as verifiable as any other Merkle tree edge. + // Resolve `digest`, and you can compute it incrementally. Resolve the + // entire tree, and you can fully compute it from scratch. + // A credulous implementation won't reject an excessive size, but this is + // harmless: you'll have some ordinals without nodes. Undersizing is + // obvious and easy to reject: you won't have an ordinal for some nodes. + Size uint32 `protobuf:"varint,3,opt,name=size,proto3" json:"size,omitempty"` +} + +func (x *DirectoryNode) Reset() { + *x = DirectoryNode{} + if protoimpl.UnsafeEnabled { + mi := &file_tvix_castore_protos_castore_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *DirectoryNode) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DirectoryNode) ProtoMessage() {} + +func (x *DirectoryNode) ProtoReflect() protoreflect.Message { + mi := &file_tvix_castore_protos_castore_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DirectoryNode.ProtoReflect.Descriptor instead. +func (*DirectoryNode) Descriptor() ([]byte, []int) { + return file_tvix_castore_protos_castore_proto_rawDescGZIP(), []int{1} +} + +func (x *DirectoryNode) GetName() []byte { + if x != nil { + return x.Name + } + return nil +} + +func (x *DirectoryNode) GetDigest() []byte { + if x != nil { + return x.Digest + } + return nil +} + +func (x *DirectoryNode) GetSize() uint32 { + if x != nil { + return x.Size + } + return 0 +} + +// A FileNode represents a regular or executable file in a Directory. +type FileNode struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The (base)name of the file + Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + // The blake3 digest of the file contents + Digest []byte `protobuf:"bytes,2,opt,name=digest,proto3" json:"digest,omitempty"` + // The file content size + Size uint32 `protobuf:"varint,3,opt,name=size,proto3" json:"size,omitempty"` + // Whether the file is executable + Executable bool `protobuf:"varint,4,opt,name=executable,proto3" json:"executable,omitempty"` +} + +func (x *FileNode) Reset() { + *x = FileNode{} + if protoimpl.UnsafeEnabled { + mi := &file_tvix_castore_protos_castore_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *FileNode) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FileNode) ProtoMessage() {} + +func (x *FileNode) ProtoReflect() protoreflect.Message { + mi := &file_tvix_castore_protos_castore_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FileNode.ProtoReflect.Descriptor instead. +func (*FileNode) Descriptor() ([]byte, []int) { + return file_tvix_castore_protos_castore_proto_rawDescGZIP(), []int{2} +} + +func (x *FileNode) GetName() []byte { + if x != nil { + return x.Name + } + return nil +} + +func (x *FileNode) GetDigest() []byte { + if x != nil { + return x.Digest + } + return nil +} + +func (x *FileNode) GetSize() uint32 { + if x != nil { + return x.Size + } + return 0 +} + +func (x *FileNode) GetExecutable() bool { + if x != nil { + return x.Executable + } + return false +} + +// A SymlinkNode represents a symbolic link in a Directory. +type SymlinkNode struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The (base)name of the symlink + Name []byte `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + // The target of the symlink. + Target []byte `protobuf:"bytes,2,opt,name=target,proto3" json:"target,omitempty"` +} + +func (x *SymlinkNode) Reset() { + *x = SymlinkNode{} + if protoimpl.UnsafeEnabled { + mi := &file_tvix_castore_protos_castore_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *SymlinkNode) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*SymlinkNode) ProtoMessage() {} + +func (x *SymlinkNode) ProtoReflect() protoreflect.Message { + mi := &file_tvix_castore_protos_castore_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use SymlinkNode.ProtoReflect.Descriptor instead. +func (*SymlinkNode) Descriptor() ([]byte, []int) { + return file_tvix_castore_protos_castore_proto_rawDescGZIP(), []int{3} +} + +func (x *SymlinkNode) GetName() []byte { + if x != nil { + return x.Name + } + return nil +} + +func (x *SymlinkNode) GetTarget() []byte { + if x != nil { + return x.Target + } + return nil +} + +// A Node is either a DirectoryNode, FileNode or SymlinkNode. +type Node struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Types that are assignable to Node: + // + // *Node_Directory + // *Node_File + // *Node_Symlink + Node isNode_Node `protobuf_oneof:"node"` +} + +func (x *Node) Reset() { + *x = Node{} + if protoimpl.UnsafeEnabled { + mi := &file_tvix_castore_protos_castore_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Node) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Node) ProtoMessage() {} + +func (x *Node) ProtoReflect() protoreflect.Message { + mi := &file_tvix_castore_protos_castore_proto_msgTypes[4] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Node.ProtoReflect.Descriptor instead. +func (*Node) Descriptor() ([]byte, []int) { + return file_tvix_castore_protos_castore_proto_rawDescGZIP(), []int{4} +} + +func (m *Node) GetNode() isNode_Node { + if m != nil { + return m.Node + } + return nil +} + +func (x *Node) GetDirectory() *DirectoryNode { + if x, ok := x.GetNode().(*Node_Directory); ok { + return x.Directory + } + return nil +} + +func (x *Node) GetFile() *FileNode { + if x, ok := x.GetNode().(*Node_File); ok { + return x.File + } + return nil +} + +func (x *Node) GetSymlink() *SymlinkNode { + if x, ok := x.GetNode().(*Node_Symlink); ok { + return x.Symlink + } + return nil +} + +type isNode_Node interface { + isNode_Node() +} + +type Node_Directory struct { + Directory *DirectoryNode `protobuf:"bytes,1,opt,name=directory,proto3,oneof"` +} + +type Node_File struct { + File *FileNode `protobuf:"bytes,2,opt,name=file,proto3,oneof"` +} + +type Node_Symlink struct { + Symlink *SymlinkNode `protobuf:"bytes,3,opt,name=symlink,proto3,oneof"` +} + +func (*Node_Directory) isNode_Node() {} + +func (*Node_File) isNode_Node() {} + +func (*Node_Symlink) isNode_Node() {} + +var File_tvix_castore_protos_castore_proto protoreflect.FileDescriptor + +var file_tvix_castore_protos_castore_proto_rawDesc = []byte{ + 0x0a, 0x21, 0x74, 0x76, 0x69, 0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x12, 0x0f, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, + 0x65, 0x2e, 0x76, 0x31, 0x22, 0xb8, 0x01, 0x0a, 0x09, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, + 0x72, 0x79, 0x12, 0x40, 0x0a, 0x0b, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x69, 0x65, + 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, + 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x79, 0x4e, 0x6f, 0x64, 0x65, 0x52, 0x0b, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, + 0x72, 0x69, 0x65, 0x73, 0x12, 0x2f, 0x0a, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, 0x02, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, + 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x4e, 0x6f, 0x64, 0x65, 0x52, 0x05, + 0x66, 0x69, 0x6c, 0x65, 0x73, 0x12, 0x38, 0x0a, 0x08, 0x73, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, + 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, + 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x79, 0x6d, 0x6c, 0x69, 0x6e, + 0x6b, 0x4e, 0x6f, 0x64, 0x65, 0x52, 0x08, 0x73, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x73, 0x22, + 0x4f, 0x0a, 0x0d, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x4e, 0x6f, 0x64, 0x65, + 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, + 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, + 0x73, 0x69, 0x7a, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, + 0x22, 0x6a, 0x0a, 0x08, 0x46, 0x69, 0x6c, 0x65, 0x4e, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, + 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, + 0x12, 0x16, 0x0a, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, + 0x52, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x12, 0x1e, 0x0a, 0x0a, + 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, + 0x52, 0x0a, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, 0x61, 0x62, 0x6c, 0x65, 0x22, 0x39, 0x0a, 0x0b, + 0x53, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x4e, 0x6f, 0x64, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6e, + 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, + 0x16, 0x0a, 0x06, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, + 0x06, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x22, 0xb9, 0x01, 0x0a, 0x04, 0x4e, 0x6f, 0x64, 0x65, + 0x12, 0x3e, 0x0a, 0x09, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, + 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x4e, + 0x6f, 0x64, 0x65, 0x48, 0x00, 0x52, 0x09, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, + 0x12, 0x2f, 0x0a, 0x04, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, + 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, + 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x4e, 0x6f, 0x64, 0x65, 0x48, 0x00, 0x52, 0x04, 0x66, 0x69, 0x6c, + 0x65, 0x12, 0x38, 0x0a, 0x07, 0x73, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, + 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x53, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x4e, 0x6f, 0x64, 0x65, + 0x48, 0x00, 0x52, 0x07, 0x73, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x42, 0x06, 0x0a, 0x04, 0x6e, + 0x6f, 0x64, 0x65, 0x42, 0x2c, 0x5a, 0x2a, 0x63, 0x6f, 0x64, 0x65, 0x2e, 0x74, 0x76, 0x6c, 0x2e, + 0x66, 0x79, 0x69, 0x2f, 0x74, 0x76, 0x69, 0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, + 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x3b, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x76, + 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_tvix_castore_protos_castore_proto_rawDescOnce sync.Once + file_tvix_castore_protos_castore_proto_rawDescData = file_tvix_castore_protos_castore_proto_rawDesc +) + +func file_tvix_castore_protos_castore_proto_rawDescGZIP() []byte { + file_tvix_castore_protos_castore_proto_rawDescOnce.Do(func() { + file_tvix_castore_protos_castore_proto_rawDescData = protoimpl.X.CompressGZIP(file_tvix_castore_protos_castore_proto_rawDescData) + }) + return file_tvix_castore_protos_castore_proto_rawDescData +} + +var file_tvix_castore_protos_castore_proto_msgTypes = make([]protoimpl.MessageInfo, 5) +var file_tvix_castore_protos_castore_proto_goTypes = []interface{}{ + (*Directory)(nil), // 0: tvix.castore.v1.Directory + (*DirectoryNode)(nil), // 1: tvix.castore.v1.DirectoryNode + (*FileNode)(nil), // 2: tvix.castore.v1.FileNode + (*SymlinkNode)(nil), // 3: tvix.castore.v1.SymlinkNode + (*Node)(nil), // 4: tvix.castore.v1.Node +} +var file_tvix_castore_protos_castore_proto_depIdxs = []int32{ + 1, // 0: tvix.castore.v1.Directory.directories:type_name -> tvix.castore.v1.DirectoryNode + 2, // 1: tvix.castore.v1.Directory.files:type_name -> tvix.castore.v1.FileNode + 3, // 2: tvix.castore.v1.Directory.symlinks:type_name -> tvix.castore.v1.SymlinkNode + 1, // 3: tvix.castore.v1.Node.directory:type_name -> tvix.castore.v1.DirectoryNode + 2, // 4: tvix.castore.v1.Node.file:type_name -> tvix.castore.v1.FileNode + 3, // 5: tvix.castore.v1.Node.symlink:type_name -> tvix.castore.v1.SymlinkNode + 6, // [6:6] is the sub-list for method output_type + 6, // [6:6] is the sub-list for method input_type + 6, // [6:6] is the sub-list for extension type_name + 6, // [6:6] is the sub-list for extension extendee + 0, // [0:6] is the sub-list for field type_name +} + +func init() { file_tvix_castore_protos_castore_proto_init() } +func file_tvix_castore_protos_castore_proto_init() { + if File_tvix_castore_protos_castore_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_tvix_castore_protos_castore_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Directory); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tvix_castore_protos_castore_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*DirectoryNode); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tvix_castore_protos_castore_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*FileNode); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tvix_castore_protos_castore_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*SymlinkNode); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tvix_castore_protos_castore_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Node); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + file_tvix_castore_protos_castore_proto_msgTypes[4].OneofWrappers = []interface{}{ + (*Node_Directory)(nil), + (*Node_File)(nil), + (*Node_Symlink)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_tvix_castore_protos_castore_proto_rawDesc, + NumEnums: 0, + NumMessages: 5, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_tvix_castore_protos_castore_proto_goTypes, + DependencyIndexes: file_tvix_castore_protos_castore_proto_depIdxs, + MessageInfos: file_tvix_castore_protos_castore_proto_msgTypes, + }.Build() + File_tvix_castore_protos_castore_proto = out.File + file_tvix_castore_protos_castore_proto_rawDesc = nil + file_tvix_castore_protos_castore_proto_goTypes = nil + file_tvix_castore_protos_castore_proto_depIdxs = nil +} diff --git a/tvix/castore/protos/castore.proto b/tvix/castore/protos/castore.proto new file mode 100644 index 000000000000..d99df43857ac --- /dev/null +++ b/tvix/castore/protos/castore.proto @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: edef +// SPDX-License-Identifier: OSL-3.0 OR MIT OR Apache-2.0 + +syntax = "proto3"; + +package tvix.castore.v1; + +option go_package = "code.tvl.fyi/tvix/castore/protos;castorev1"; + +// A Directory can contain Directory, File or Symlink nodes. +// Each of these nodes have a name attribute, which is the basename in that directory +// and node type specific attributes. +// The name attribute: +// - MUST not contain slashes or null bytes +// - MUST not be '.' or '..' +// - MUST be unique across all three lists +// Elements in each list need to be lexicographically ordered by the name +// attribute. +message Directory { + repeated DirectoryNode directories = 1; + repeated FileNode files = 2; + repeated SymlinkNode symlinks = 3; +} + +// A DirectoryNode represents a directory in a Directory. +message DirectoryNode { + // The (base)name of the directory + bytes name = 1; + // The blake3 hash of a Directory message, serialized in protobuf canonical form. + bytes digest = 2; + // Number of child elements in the Directory referred to by `digest`. + // Calculated by summing up the numbers of `directories`, `files` and + // `symlinks`, and for each directory, its size field. Used for inode + // number calculation. + // This field is precisely as verifiable as any other Merkle tree edge. + // Resolve `digest`, and you can compute it incrementally. Resolve the + // entire tree, and you can fully compute it from scratch. + // A credulous implementation won't reject an excessive size, but this is + // harmless: you'll have some ordinals without nodes. Undersizing is + // obvious and easy to reject: you won't have an ordinal for some nodes. + uint32 size = 3; +} + +// A FileNode represents a regular or executable file in a Directory. +message FileNode { + // The (base)name of the file + bytes name = 1; + // The blake3 digest of the file contents + bytes digest = 2; + // The file content size + uint32 size = 3; + // Whether the file is executable + bool executable = 4; +} + +// A SymlinkNode represents a symbolic link in a Directory. +message SymlinkNode { + // The (base)name of the symlink + bytes name = 1; + // The target of the symlink. + bytes target = 2; +} + +// A Node is either a DirectoryNode, FileNode or SymlinkNode. +message Node { + oneof node { + DirectoryNode directory = 1; + FileNode file = 2; + SymlinkNode symlink = 3; + } +} + diff --git a/tvix/castore/protos/castore_test.go b/tvix/castore/protos/castore_test.go new file mode 100644 index 000000000000..958d399d76cc --- /dev/null +++ b/tvix/castore/protos/castore_test.go @@ -0,0 +1,271 @@ +package castorev1_test + +import ( + "testing" + + castorev1pb "code.tvl.fyi/tvix/castore/protos" + "github.com/stretchr/testify/assert" +) + +var ( + dummyDigest = []byte{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + } +) + +func TestDirectorySize(t *testing.T) { + t.Run("empty", func(t *testing.T) { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + assert.Equal(t, uint32(0), d.Size()) + }) + + t.Run("containing single empty directory", func(t *testing.T) { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{{ + Name: []byte([]byte("foo")), + Digest: dummyDigest, + Size: 0, + }}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + assert.Equal(t, uint32(1), d.Size()) + }) + + t.Run("containing single non-empty directory", func(t *testing.T) { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{{ + Name: []byte("foo"), + Digest: dummyDigest, + Size: 4, + }}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + assert.Equal(t, uint32(5), d.Size()) + }) + + t.Run("containing single file", func(t *testing.T) { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{}, + Files: []*castorev1pb.FileNode{{ + Name: []byte("foo"), + Digest: dummyDigest, + Size: 42, + Executable: false, + }}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + assert.Equal(t, uint32(1), d.Size()) + }) + + t.Run("containing single symlink", func(t *testing.T) { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{{ + Name: []byte("foo"), + Target: []byte("bar"), + }}, + } + + assert.Equal(t, uint32(1), d.Size()) + }) + +} +func TestDirectoryDigest(t *testing.T) { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + dgst, err := d.Digest() + assert.NoError(t, err, "calling Digest() on a directory shouldn't error") + assert.Equal(t, []byte{ + 0xaf, 0x13, 0x49, 0xb9, 0xf5, 0xf9, 0xa1, 0xa6, 0xa0, 0x40, 0x4d, 0xea, 0x36, 0xdc, + 0xc9, 0x49, 0x9b, 0xcb, 0x25, 0xc9, 0xad, 0xc1, 0x12, 0xb7, 0xcc, 0x9a, 0x93, 0xca, + 0xe4, 0x1f, 0x32, 0x62, + }, dgst) +} + +func TestDirectoryValidate(t *testing.T) { + t.Run("empty", func(t *testing.T) { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + assert.NoError(t, d.Validate()) + }) + + t.Run("invalid names", func(t *testing.T) { + { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{{ + Name: []byte{}, + Digest: dummyDigest, + Size: 42, + }}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + assert.ErrorContains(t, d.Validate(), "invalid name") + } + { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{{ + Name: []byte("."), + Digest: dummyDigest, + Size: 42, + }}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + assert.ErrorContains(t, d.Validate(), "invalid name") + } + { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{}, + Files: []*castorev1pb.FileNode{{ + Name: []byte(".."), + Digest: dummyDigest, + Size: 42, + Executable: false, + }}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + assert.ErrorContains(t, d.Validate(), "invalid name") + } + { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{{ + Name: []byte("\x00"), + Target: []byte("foo"), + }}, + } + + assert.ErrorContains(t, d.Validate(), "invalid name") + } + { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{{ + Name: []byte("foo/bar"), + Target: []byte("foo"), + }}, + } + + assert.ErrorContains(t, d.Validate(), "invalid name") + } + }) + + t.Run("invalid digest", func(t *testing.T) { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{{ + Name: []byte("foo"), + Digest: nil, + Size: 42, + }}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + assert.ErrorContains(t, d.Validate(), "invalid digest length") + }) + + t.Run("sorting", func(t *testing.T) { + // "b" comes before "a", bad. + { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{{ + Name: []byte("b"), + Digest: dummyDigest, + Size: 42, + }, { + Name: []byte("a"), + Digest: dummyDigest, + Size: 42, + }}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + assert.ErrorContains(t, d.Validate(), "is not in sorted order") + } + + // "a" exists twice, bad. + { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{{ + Name: []byte("a"), + Digest: dummyDigest, + Size: 42, + }}, + Files: []*castorev1pb.FileNode{{ + Name: []byte("a"), + Digest: dummyDigest, + Size: 42, + Executable: false, + }}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + assert.ErrorContains(t, d.Validate(), "duplicate name") + } + + // "a" comes before "b", all good. + { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{{ + Name: []byte("a"), + Digest: dummyDigest, + Size: 42, + }, { + Name: []byte("b"), + Digest: dummyDigest, + Size: 42, + }}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + assert.NoError(t, d.Validate(), "shouldn't error") + } + + // [b, c] and [a] are both properly sorted. + { + d := castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{{ + Name: []byte("b"), + Digest: dummyDigest, + Size: 42, + }, { + Name: []byte("c"), + Digest: dummyDigest, + Size: 42, + }}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{{ + Name: []byte("a"), + Target: []byte("foo"), + }}, + } + assert.NoError(t, d.Validate(), "shouldn't error") + } + }) +} diff --git a/tvix/castore/protos/go.mod b/tvix/castore/protos/go.mod new file mode 100644 index 000000000000..35219aba5c1b --- /dev/null +++ b/tvix/castore/protos/go.mod @@ -0,0 +1,19 @@ +module code.tvl.fyi/tvix/castore/protos + +go 1.19 + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/golang/protobuf v1.5.2 // indirect + github.com/klauspost/cpuid/v2 v2.0.9 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/stretchr/testify v1.8.1 // indirect + golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect + golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect + golang.org/x/text v0.4.0 // indirect + google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 // indirect + google.golang.org/grpc v1.51.0 // indirect + google.golang.org/protobuf v1.28.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + lukechampine.com/blake3 v1.1.7 // indirect +) diff --git a/tvix/castore/protos/go.sum b/tvix/castore/protos/go.sum new file mode 100644 index 000000000000..7a603cdb120d --- /dev/null +++ b/tvix/castore/protos/go.sum @@ -0,0 +1,96 @@ +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 h1:+kGHl1aib/qcwaRi1CbqBZ1rk19r85MNUf8HaBghugY= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.51.0 h1:E1eGv1FTqoLIdnBCZufiSHgKjlqG6fKFf6pPWtMTh8U= +google.golang.org/grpc v1.51.0/go.mod h1:wgNDFcnuBGmxLKI/qn4T+m5BtEBYXJPvibbUPsAIPww= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= +google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +lukechampine.com/blake3 v1.1.7 h1:GgRMhmdsuK8+ii6UZFDL8Nb+VyMwadAgcJyfYHxG6n0= +lukechampine.com/blake3 v1.1.7/go.mod h1:tkKEOtDkNtklkXtLNEOGNq5tcV90tJiA1vAA12R78LA= diff --git a/tvix/castore/protos/rpc_blobstore.pb.go b/tvix/castore/protos/rpc_blobstore.pb.go new file mode 100644 index 000000000000..1afc82674451 --- /dev/null +++ b/tvix/castore/protos/rpc_blobstore.pb.go @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: MIT +// Copyright © 2022 The Tvix Authors + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.31.0 +// protoc (unknown) +// source: tvix/castore/protos/rpc_blobstore.proto + +package castorev1 + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type StatBlobRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The blake3 digest of the blob requested + Digest []byte `protobuf:"bytes,1,opt,name=digest,proto3" json:"digest,omitempty"` +} + +func (x *StatBlobRequest) Reset() { + *x = StatBlobRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StatBlobRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StatBlobRequest) ProtoMessage() {} + +func (x *StatBlobRequest) ProtoReflect() protoreflect.Message { + mi := &file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StatBlobRequest.ProtoReflect.Descriptor instead. +func (*StatBlobRequest) Descriptor() ([]byte, []int) { + return file_tvix_castore_protos_rpc_blobstore_proto_rawDescGZIP(), []int{0} +} + +func (x *StatBlobRequest) GetDigest() []byte { + if x != nil { + return x.Digest + } + return nil +} + +type BlobMeta struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields +} + +func (x *BlobMeta) Reset() { + *x = BlobMeta{} + if protoimpl.UnsafeEnabled { + mi := &file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *BlobMeta) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*BlobMeta) ProtoMessage() {} + +func (x *BlobMeta) ProtoReflect() protoreflect.Message { + mi := &file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use BlobMeta.ProtoReflect.Descriptor instead. +func (*BlobMeta) Descriptor() ([]byte, []int) { + return file_tvix_castore_protos_rpc_blobstore_proto_rawDescGZIP(), []int{1} +} + +type ReadBlobRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The blake3 digest of the blob or chunk requested + Digest []byte `protobuf:"bytes,1,opt,name=digest,proto3" json:"digest,omitempty"` +} + +func (x *ReadBlobRequest) Reset() { + *x = ReadBlobRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ReadBlobRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ReadBlobRequest) ProtoMessage() {} + +func (x *ReadBlobRequest) ProtoReflect() protoreflect.Message { + mi := &file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ReadBlobRequest.ProtoReflect.Descriptor instead. +func (*ReadBlobRequest) Descriptor() ([]byte, []int) { + return file_tvix_castore_protos_rpc_blobstore_proto_rawDescGZIP(), []int{2} +} + +func (x *ReadBlobRequest) GetDigest() []byte { + if x != nil { + return x.Digest + } + return nil +} + +// This represents some bytes of a blob. +// Blobs are sent in smaller chunks to keep message sizes manageable. +type BlobChunk struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Data []byte `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` +} + +func (x *BlobChunk) Reset() { + *x = BlobChunk{} + if protoimpl.UnsafeEnabled { + mi := &file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *BlobChunk) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*BlobChunk) ProtoMessage() {} + +func (x *BlobChunk) ProtoReflect() protoreflect.Message { + mi := &file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use BlobChunk.ProtoReflect.Descriptor instead. +func (*BlobChunk) Descriptor() ([]byte, []int) { + return file_tvix_castore_protos_rpc_blobstore_proto_rawDescGZIP(), []int{3} +} + +func (x *BlobChunk) GetData() []byte { + if x != nil { + return x.Data + } + return nil +} + +type PutBlobResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The blake3 digest of the data that was sent. + Digest []byte `protobuf:"bytes,1,opt,name=digest,proto3" json:"digest,omitempty"` +} + +func (x *PutBlobResponse) Reset() { + *x = PutBlobResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PutBlobResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PutBlobResponse) ProtoMessage() {} + +func (x *PutBlobResponse) ProtoReflect() protoreflect.Message { + mi := &file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[4] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PutBlobResponse.ProtoReflect.Descriptor instead. +func (*PutBlobResponse) Descriptor() ([]byte, []int) { + return file_tvix_castore_protos_rpc_blobstore_proto_rawDescGZIP(), []int{4} +} + +func (x *PutBlobResponse) GetDigest() []byte { + if x != nil { + return x.Digest + } + return nil +} + +var File_tvix_castore_protos_rpc_blobstore_proto protoreflect.FileDescriptor + +var file_tvix_castore_protos_rpc_blobstore_proto_rawDesc = []byte{ + 0x0a, 0x27, 0x74, 0x76, 0x69, 0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x72, 0x70, 0x63, 0x5f, 0x62, 0x6c, 0x6f, 0x62, 0x73, 0x74, + 0x6f, 0x72, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0f, 0x74, 0x76, 0x69, 0x78, 0x2e, + 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x22, 0x29, 0x0a, 0x0f, 0x53, 0x74, + 0x61, 0x74, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a, + 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x64, + 0x69, 0x67, 0x65, 0x73, 0x74, 0x22, 0x0a, 0x0a, 0x08, 0x42, 0x6c, 0x6f, 0x62, 0x4d, 0x65, 0x74, + 0x61, 0x22, 0x29, 0x0a, 0x0f, 0x52, 0x65, 0x61, 0x64, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x22, 0x1f, 0x0a, 0x09, + 0x42, 0x6c, 0x6f, 0x62, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x64, 0x61, 0x74, + 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x64, 0x61, 0x74, 0x61, 0x22, 0x29, 0x0a, + 0x0f, 0x50, 0x75, 0x74, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x16, 0x0a, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, + 0x52, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x32, 0xe1, 0x01, 0x0a, 0x0b, 0x42, 0x6c, 0x6f, + 0x62, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x43, 0x0a, 0x04, 0x53, 0x74, 0x61, 0x74, + 0x12, 0x20, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, + 0x76, 0x31, 0x2e, 0x53, 0x74, 0x61, 0x74, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, + 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x42, 0x6c, 0x6f, 0x62, 0x4d, 0x65, 0x74, 0x61, 0x12, 0x46, 0x0a, + 0x04, 0x52, 0x65, 0x61, 0x64, 0x12, 0x20, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, + 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x52, 0x65, 0x61, 0x64, 0x42, 0x6c, 0x6f, 0x62, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1a, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, + 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x42, 0x6c, 0x6f, 0x62, 0x43, 0x68, + 0x75, 0x6e, 0x6b, 0x30, 0x01, 0x12, 0x45, 0x0a, 0x03, 0x50, 0x75, 0x74, 0x12, 0x1a, 0x2e, 0x74, + 0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x42, + 0x6c, 0x6f, 0x62, 0x43, 0x68, 0x75, 0x6e, 0x6b, 0x1a, 0x20, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, + 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x50, 0x75, 0x74, 0x42, 0x6c, + 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x28, 0x01, 0x42, 0x2c, 0x5a, 0x2a, + 0x63, 0x6f, 0x64, 0x65, 0x2e, 0x74, 0x76, 0x6c, 0x2e, 0x66, 0x79, 0x69, 0x2f, 0x74, 0x76, 0x69, + 0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, + 0x3b, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x33, +} + +var ( + file_tvix_castore_protos_rpc_blobstore_proto_rawDescOnce sync.Once + file_tvix_castore_protos_rpc_blobstore_proto_rawDescData = file_tvix_castore_protos_rpc_blobstore_proto_rawDesc +) + +func file_tvix_castore_protos_rpc_blobstore_proto_rawDescGZIP() []byte { + file_tvix_castore_protos_rpc_blobstore_proto_rawDescOnce.Do(func() { + file_tvix_castore_protos_rpc_blobstore_proto_rawDescData = protoimpl.X.CompressGZIP(file_tvix_castore_protos_rpc_blobstore_proto_rawDescData) + }) + return file_tvix_castore_protos_rpc_blobstore_proto_rawDescData +} + +var file_tvix_castore_protos_rpc_blobstore_proto_msgTypes = make([]protoimpl.MessageInfo, 5) +var file_tvix_castore_protos_rpc_blobstore_proto_goTypes = []interface{}{ + (*StatBlobRequest)(nil), // 0: tvix.castore.v1.StatBlobRequest + (*BlobMeta)(nil), // 1: tvix.castore.v1.BlobMeta + (*ReadBlobRequest)(nil), // 2: tvix.castore.v1.ReadBlobRequest + (*BlobChunk)(nil), // 3: tvix.castore.v1.BlobChunk + (*PutBlobResponse)(nil), // 4: tvix.castore.v1.PutBlobResponse +} +var file_tvix_castore_protos_rpc_blobstore_proto_depIdxs = []int32{ + 0, // 0: tvix.castore.v1.BlobService.Stat:input_type -> tvix.castore.v1.StatBlobRequest + 2, // 1: tvix.castore.v1.BlobService.Read:input_type -> tvix.castore.v1.ReadBlobRequest + 3, // 2: tvix.castore.v1.BlobService.Put:input_type -> tvix.castore.v1.BlobChunk + 1, // 3: tvix.castore.v1.BlobService.Stat:output_type -> tvix.castore.v1.BlobMeta + 3, // 4: tvix.castore.v1.BlobService.Read:output_type -> tvix.castore.v1.BlobChunk + 4, // 5: tvix.castore.v1.BlobService.Put:output_type -> tvix.castore.v1.PutBlobResponse + 3, // [3:6] is the sub-list for method output_type + 0, // [0:3] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_tvix_castore_protos_rpc_blobstore_proto_init() } +func file_tvix_castore_protos_rpc_blobstore_proto_init() { + if File_tvix_castore_protos_rpc_blobstore_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*StatBlobRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*BlobMeta); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ReadBlobRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*BlobChunk); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tvix_castore_protos_rpc_blobstore_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutBlobResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_tvix_castore_protos_rpc_blobstore_proto_rawDesc, + NumEnums: 0, + NumMessages: 5, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_tvix_castore_protos_rpc_blobstore_proto_goTypes, + DependencyIndexes: file_tvix_castore_protos_rpc_blobstore_proto_depIdxs, + MessageInfos: file_tvix_castore_protos_rpc_blobstore_proto_msgTypes, + }.Build() + File_tvix_castore_protos_rpc_blobstore_proto = out.File + file_tvix_castore_protos_rpc_blobstore_proto_rawDesc = nil + file_tvix_castore_protos_rpc_blobstore_proto_goTypes = nil + file_tvix_castore_protos_rpc_blobstore_proto_depIdxs = nil +} diff --git a/tvix/castore/protos/rpc_blobstore.proto b/tvix/castore/protos/rpc_blobstore.proto new file mode 100644 index 000000000000..6ee9a80f0afc --- /dev/null +++ b/tvix/castore/protos/rpc_blobstore.proto @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: MIT +// Copyright © 2022 The Tvix Authors +syntax = "proto3"; + +package tvix.castore.v1; + +option go_package = "code.tvl.fyi/tvix/castore/protos;castorev1"; + +service BlobService { + // In the future, Stat will expose more metadata about a given blob, + // such as more granular chunking, baos. + // For now, it's only used to check for the existence of a blob, as asking + // this for a non-existing Blob will return a Status::not_found gRPC error. + rpc Stat(StatBlobRequest) returns (BlobMeta); + + // Read returns a stream of BlobChunk, which is just a stream of bytes with + // the digest specified in ReadBlobRequest. + // + // The server may decide on whatever chunking it may seem fit as a size for + // the individual BlobChunk sent in the response stream. + rpc Read(ReadBlobRequest) returns (stream BlobChunk); + + // Put uploads a Blob, by reading a stream of bytes. + // + // The way the data is chunked up in individual BlobChunk messages sent in + // the stream has no effect on how the server ends up chunking blobs up. + rpc Put(stream BlobChunk) returns (PutBlobResponse); +} + +message StatBlobRequest { + // The blake3 digest of the blob requested + bytes digest = 1; +} + +message BlobMeta { +} + +message ReadBlobRequest { + // The blake3 digest of the blob or chunk requested + bytes digest = 1; +} + +// This represents some bytes of a blob. +// Blobs are sent in smaller chunks to keep message sizes manageable. +message BlobChunk { + bytes data = 1; +} + +message PutBlobResponse { + // The blake3 digest of the data that was sent. + bytes digest = 1; +} diff --git a/tvix/castore/protos/rpc_blobstore_grpc.pb.go b/tvix/castore/protos/rpc_blobstore_grpc.pb.go new file mode 100644 index 000000000000..0876bcc4e95a --- /dev/null +++ b/tvix/castore/protos/rpc_blobstore_grpc.pb.go @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: MIT +// Copyright © 2022 The Tvix Authors + +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.3.0 +// - protoc (unknown) +// source: tvix/castore/protos/rpc_blobstore.proto + +package castorev1 + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.32.0 or later. +const _ = grpc.SupportPackageIsVersion7 + +const ( + BlobService_Stat_FullMethodName = "/tvix.castore.v1.BlobService/Stat" + BlobService_Read_FullMethodName = "/tvix.castore.v1.BlobService/Read" + BlobService_Put_FullMethodName = "/tvix.castore.v1.BlobService/Put" +) + +// BlobServiceClient is the client API for BlobService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type BlobServiceClient interface { + // In the future, Stat will expose more metadata about a given blob, + // such as more granular chunking, baos. + // For now, it's only used to check for the existence of a blob, as asking + // this for a non-existing Blob will return a Status::not_found gRPC error. + Stat(ctx context.Context, in *StatBlobRequest, opts ...grpc.CallOption) (*BlobMeta, error) + // Read returns a stream of BlobChunk, which is just a stream of bytes with + // the digest specified in ReadBlobRequest. + // + // The server may decide on whatever chunking it may seem fit as a size for + // the individual BlobChunk sent in the response stream. + Read(ctx context.Context, in *ReadBlobRequest, opts ...grpc.CallOption) (BlobService_ReadClient, error) + // Put uploads a Blob, by reading a stream of bytes. + // + // The way the data is chunked up in individual BlobChunk messages sent in + // the stream has no effect on how the server ends up chunking blobs up. + Put(ctx context.Context, opts ...grpc.CallOption) (BlobService_PutClient, error) +} + +type blobServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewBlobServiceClient(cc grpc.ClientConnInterface) BlobServiceClient { + return &blobServiceClient{cc} +} + +func (c *blobServiceClient) Stat(ctx context.Context, in *StatBlobRequest, opts ...grpc.CallOption) (*BlobMeta, error) { + out := new(BlobMeta) + err := c.cc.Invoke(ctx, BlobService_Stat_FullMethodName, in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *blobServiceClient) Read(ctx context.Context, in *ReadBlobRequest, opts ...grpc.CallOption) (BlobService_ReadClient, error) { + stream, err := c.cc.NewStream(ctx, &BlobService_ServiceDesc.Streams[0], BlobService_Read_FullMethodName, opts...) + if err != nil { + return nil, err + } + x := &blobServiceReadClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type BlobService_ReadClient interface { + Recv() (*BlobChunk, error) + grpc.ClientStream +} + +type blobServiceReadClient struct { + grpc.ClientStream +} + +func (x *blobServiceReadClient) Recv() (*BlobChunk, error) { + m := new(BlobChunk) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *blobServiceClient) Put(ctx context.Context, opts ...grpc.CallOption) (BlobService_PutClient, error) { + stream, err := c.cc.NewStream(ctx, &BlobService_ServiceDesc.Streams[1], BlobService_Put_FullMethodName, opts...) + if err != nil { + return nil, err + } + x := &blobServicePutClient{stream} + return x, nil +} + +type BlobService_PutClient interface { + Send(*BlobChunk) error + CloseAndRecv() (*PutBlobResponse, error) + grpc.ClientStream +} + +type blobServicePutClient struct { + grpc.ClientStream +} + +func (x *blobServicePutClient) Send(m *BlobChunk) error { + return x.ClientStream.SendMsg(m) +} + +func (x *blobServicePutClient) CloseAndRecv() (*PutBlobResponse, error) { + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + m := new(PutBlobResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +// BlobServiceServer is the server API for BlobService service. +// All implementations must embed UnimplementedBlobServiceServer +// for forward compatibility +type BlobServiceServer interface { + // In the future, Stat will expose more metadata about a given blob, + // such as more granular chunking, baos. + // For now, it's only used to check for the existence of a blob, as asking + // this for a non-existing Blob will return a Status::not_found gRPC error. + Stat(context.Context, *StatBlobRequest) (*BlobMeta, error) + // Read returns a stream of BlobChunk, which is just a stream of bytes with + // the digest specified in ReadBlobRequest. + // + // The server may decide on whatever chunking it may seem fit as a size for + // the individual BlobChunk sent in the response stream. + Read(*ReadBlobRequest, BlobService_ReadServer) error + // Put uploads a Blob, by reading a stream of bytes. + // + // The way the data is chunked up in individual BlobChunk messages sent in + // the stream has no effect on how the server ends up chunking blobs up. + Put(BlobService_PutServer) error + mustEmbedUnimplementedBlobServiceServer() +} + +// UnimplementedBlobServiceServer must be embedded to have forward compatible implementations. +type UnimplementedBlobServiceServer struct { +} + +func (UnimplementedBlobServiceServer) Stat(context.Context, *StatBlobRequest) (*BlobMeta, error) { + return nil, status.Errorf(codes.Unimplemented, "method Stat not implemented") +} +func (UnimplementedBlobServiceServer) Read(*ReadBlobRequest, BlobService_ReadServer) error { + return status.Errorf(codes.Unimplemented, "method Read not implemented") +} +func (UnimplementedBlobServiceServer) Put(BlobService_PutServer) error { + return status.Errorf(codes.Unimplemented, "method Put not implemented") +} +func (UnimplementedBlobServiceServer) mustEmbedUnimplementedBlobServiceServer() {} + +// UnsafeBlobServiceServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to BlobServiceServer will +// result in compilation errors. +type UnsafeBlobServiceServer interface { + mustEmbedUnimplementedBlobServiceServer() +} + +func RegisterBlobServiceServer(s grpc.ServiceRegistrar, srv BlobServiceServer) { + s.RegisterService(&BlobService_ServiceDesc, srv) +} + +func _BlobService_Stat_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(StatBlobRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(BlobServiceServer).Stat(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: BlobService_Stat_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(BlobServiceServer).Stat(ctx, req.(*StatBlobRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _BlobService_Read_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(ReadBlobRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(BlobServiceServer).Read(m, &blobServiceReadServer{stream}) +} + +type BlobService_ReadServer interface { + Send(*BlobChunk) error + grpc.ServerStream +} + +type blobServiceReadServer struct { + grpc.ServerStream +} + +func (x *blobServiceReadServer) Send(m *BlobChunk) error { + return x.ServerStream.SendMsg(m) +} + +func _BlobService_Put_Handler(srv interface{}, stream grpc.ServerStream) error { + return srv.(BlobServiceServer).Put(&blobServicePutServer{stream}) +} + +type BlobService_PutServer interface { + SendAndClose(*PutBlobResponse) error + Recv() (*BlobChunk, error) + grpc.ServerStream +} + +type blobServicePutServer struct { + grpc.ServerStream +} + +func (x *blobServicePutServer) SendAndClose(m *PutBlobResponse) error { + return x.ServerStream.SendMsg(m) +} + +func (x *blobServicePutServer) Recv() (*BlobChunk, error) { + m := new(BlobChunk) + if err := x.ServerStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +// BlobService_ServiceDesc is the grpc.ServiceDesc for BlobService service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var BlobService_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "tvix.castore.v1.BlobService", + HandlerType: (*BlobServiceServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "Stat", + Handler: _BlobService_Stat_Handler, + }, + }, + Streams: []grpc.StreamDesc{ + { + StreamName: "Read", + Handler: _BlobService_Read_Handler, + ServerStreams: true, + }, + { + StreamName: "Put", + Handler: _BlobService_Put_Handler, + ClientStreams: true, + }, + }, + Metadata: "tvix/castore/protos/rpc_blobstore.proto", +} diff --git a/tvix/castore/protos/rpc_directory.pb.go b/tvix/castore/protos/rpc_directory.pb.go new file mode 100644 index 000000000000..f658c6b60cc0 --- /dev/null +++ b/tvix/castore/protos/rpc_directory.pb.go @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: MIT +// Copyright © 2022 The Tvix Authors + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.31.0 +// protoc (unknown) +// source: tvix/castore/protos/rpc_directory.proto + +package castorev1 + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type GetDirectoryRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Types that are assignable to ByWhat: + // + // *GetDirectoryRequest_Digest + ByWhat isGetDirectoryRequest_ByWhat `protobuf_oneof:"by_what"` + // If set to true, recursively resolve all child Directory messages. + // Directory messages SHOULD be streamed in a recursive breadth-first walk, + // but other orders are also fine, as long as Directory messages are only + // sent after they are referred to from previously sent Directory messages. + Recursive bool `protobuf:"varint,2,opt,name=recursive,proto3" json:"recursive,omitempty"` +} + +func (x *GetDirectoryRequest) Reset() { + *x = GetDirectoryRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_tvix_castore_protos_rpc_directory_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetDirectoryRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetDirectoryRequest) ProtoMessage() {} + +func (x *GetDirectoryRequest) ProtoReflect() protoreflect.Message { + mi := &file_tvix_castore_protos_rpc_directory_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetDirectoryRequest.ProtoReflect.Descriptor instead. +func (*GetDirectoryRequest) Descriptor() ([]byte, []int) { + return file_tvix_castore_protos_rpc_directory_proto_rawDescGZIP(), []int{0} +} + +func (m *GetDirectoryRequest) GetByWhat() isGetDirectoryRequest_ByWhat { + if m != nil { + return m.ByWhat + } + return nil +} + +func (x *GetDirectoryRequest) GetDigest() []byte { + if x, ok := x.GetByWhat().(*GetDirectoryRequest_Digest); ok { + return x.Digest + } + return nil +} + +func (x *GetDirectoryRequest) GetRecursive() bool { + if x != nil { + return x.Recursive + } + return false +} + +type isGetDirectoryRequest_ByWhat interface { + isGetDirectoryRequest_ByWhat() +} + +type GetDirectoryRequest_Digest struct { + // The blake3 hash of the (root) Directory message, serialized in + // protobuf canonical form. + // Keep in mind this can be a subtree of another root. + Digest []byte `protobuf:"bytes,1,opt,name=digest,proto3,oneof"` +} + +func (*GetDirectoryRequest_Digest) isGetDirectoryRequest_ByWhat() {} + +type PutDirectoryResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + RootDigest []byte `protobuf:"bytes,1,opt,name=root_digest,json=rootDigest,proto3" json:"root_digest,omitempty"` +} + +func (x *PutDirectoryResponse) Reset() { + *x = PutDirectoryResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_tvix_castore_protos_rpc_directory_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PutDirectoryResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PutDirectoryResponse) ProtoMessage() {} + +func (x *PutDirectoryResponse) ProtoReflect() protoreflect.Message { + mi := &file_tvix_castore_protos_rpc_directory_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PutDirectoryResponse.ProtoReflect.Descriptor instead. +func (*PutDirectoryResponse) Descriptor() ([]byte, []int) { + return file_tvix_castore_protos_rpc_directory_proto_rawDescGZIP(), []int{1} +} + +func (x *PutDirectoryResponse) GetRootDigest() []byte { + if x != nil { + return x.RootDigest + } + return nil +} + +var File_tvix_castore_protos_rpc_directory_proto protoreflect.FileDescriptor + +var file_tvix_castore_protos_rpc_directory_proto_rawDesc = []byte{ + 0x0a, 0x27, 0x74, 0x76, 0x69, 0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, 0x72, 0x70, 0x63, 0x5f, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x79, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0f, 0x74, 0x76, 0x69, 0x78, 0x2e, + 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x1a, 0x21, 0x74, 0x76, 0x69, 0x78, + 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, 0x2f, + 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x58, 0x0a, + 0x13, 0x47, 0x65, 0x74, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x52, 0x65, 0x71, + 0x75, 0x65, 0x73, 0x74, 0x12, 0x18, 0x0a, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0c, 0x48, 0x00, 0x52, 0x06, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x12, 0x1c, + 0x0a, 0x09, 0x72, 0x65, 0x63, 0x75, 0x72, 0x73, 0x69, 0x76, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x08, 0x52, 0x09, 0x72, 0x65, 0x63, 0x75, 0x72, 0x73, 0x69, 0x76, 0x65, 0x42, 0x09, 0x0a, 0x07, + 0x62, 0x79, 0x5f, 0x77, 0x68, 0x61, 0x74, 0x22, 0x37, 0x0a, 0x14, 0x50, 0x75, 0x74, 0x44, 0x69, + 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, + 0x1f, 0x0a, 0x0b, 0x72, 0x6f, 0x6f, 0x74, 0x5f, 0x64, 0x69, 0x67, 0x65, 0x73, 0x74, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0a, 0x72, 0x6f, 0x6f, 0x74, 0x44, 0x69, 0x67, 0x65, 0x73, 0x74, + 0x32, 0xa9, 0x01, 0x0a, 0x10, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x53, 0x65, + 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x49, 0x0a, 0x03, 0x47, 0x65, 0x74, 0x12, 0x24, 0x2e, 0x74, + 0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x47, + 0x65, 0x74, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x1a, 0x1a, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, + 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x79, 0x30, 0x01, + 0x12, 0x4a, 0x0a, 0x03, 0x50, 0x75, 0x74, 0x12, 0x1a, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, + 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x79, 0x1a, 0x25, 0x2e, 0x74, 0x76, 0x69, 0x78, 0x2e, 0x63, 0x61, 0x73, 0x74, 0x6f, + 0x72, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x50, 0x75, 0x74, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, + 0x72, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x28, 0x01, 0x42, 0x2c, 0x5a, 0x2a, + 0x63, 0x6f, 0x64, 0x65, 0x2e, 0x74, 0x76, 0x6c, 0x2e, 0x66, 0x79, 0x69, 0x2f, 0x74, 0x76, 0x69, + 0x78, 0x2f, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x73, + 0x3b, 0x63, 0x61, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x76, 0x31, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x33, +} + +var ( + file_tvix_castore_protos_rpc_directory_proto_rawDescOnce sync.Once + file_tvix_castore_protos_rpc_directory_proto_rawDescData = file_tvix_castore_protos_rpc_directory_proto_rawDesc +) + +func file_tvix_castore_protos_rpc_directory_proto_rawDescGZIP() []byte { + file_tvix_castore_protos_rpc_directory_proto_rawDescOnce.Do(func() { + file_tvix_castore_protos_rpc_directory_proto_rawDescData = protoimpl.X.CompressGZIP(file_tvix_castore_protos_rpc_directory_proto_rawDescData) + }) + return file_tvix_castore_protos_rpc_directory_proto_rawDescData +} + +var file_tvix_castore_protos_rpc_directory_proto_msgTypes = make([]protoimpl.MessageInfo, 2) +var file_tvix_castore_protos_rpc_directory_proto_goTypes = []interface{}{ + (*GetDirectoryRequest)(nil), // 0: tvix.castore.v1.GetDirectoryRequest + (*PutDirectoryResponse)(nil), // 1: tvix.castore.v1.PutDirectoryResponse + (*Directory)(nil), // 2: tvix.castore.v1.Directory +} +var file_tvix_castore_protos_rpc_directory_proto_depIdxs = []int32{ + 0, // 0: tvix.castore.v1.DirectoryService.Get:input_type -> tvix.castore.v1.GetDirectoryRequest + 2, // 1: tvix.castore.v1.DirectoryService.Put:input_type -> tvix.castore.v1.Directory + 2, // 2: tvix.castore.v1.DirectoryService.Get:output_type -> tvix.castore.v1.Directory + 1, // 3: tvix.castore.v1.DirectoryService.Put:output_type -> tvix.castore.v1.PutDirectoryResponse + 2, // [2:4] is the sub-list for method output_type + 0, // [0:2] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_tvix_castore_protos_rpc_directory_proto_init() } +func file_tvix_castore_protos_rpc_directory_proto_init() { + if File_tvix_castore_protos_rpc_directory_proto != nil { + return + } + file_tvix_castore_protos_castore_proto_init() + if !protoimpl.UnsafeEnabled { + file_tvix_castore_protos_rpc_directory_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetDirectoryRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_tvix_castore_protos_rpc_directory_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PutDirectoryResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + file_tvix_castore_protos_rpc_directory_proto_msgTypes[0].OneofWrappers = []interface{}{ + (*GetDirectoryRequest_Digest)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_tvix_castore_protos_rpc_directory_proto_rawDesc, + NumEnums: 0, + NumMessages: 2, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_tvix_castore_protos_rpc_directory_proto_goTypes, + DependencyIndexes: file_tvix_castore_protos_rpc_directory_proto_depIdxs, + MessageInfos: file_tvix_castore_protos_rpc_directory_proto_msgTypes, + }.Build() + File_tvix_castore_protos_rpc_directory_proto = out.File + file_tvix_castore_protos_rpc_directory_proto_rawDesc = nil + file_tvix_castore_protos_rpc_directory_proto_goTypes = nil + file_tvix_castore_protos_rpc_directory_proto_depIdxs = nil +} diff --git a/tvix/castore/protos/rpc_directory.proto b/tvix/castore/protos/rpc_directory.proto new file mode 100644 index 000000000000..8d4c22828547 --- /dev/null +++ b/tvix/castore/protos/rpc_directory.proto @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: MIT +// Copyright © 2022 The Tvix Authors +syntax = "proto3"; + +package tvix.castore.v1; + +import "tvix/castore/protos/castore.proto"; + +option go_package = "code.tvl.fyi/tvix/castore/protos;castorev1"; + +service DirectoryService { + // Get retrieves a stream of Directory messages, by using the lookup + // parameters in GetDirectoryRequest. + // Keep in mind multiple DirectoryNodes in different parts of the graph might + // have the same digest if they have the same underlying contents, + // so sending subsequent ones can be omitted. + rpc Get(GetDirectoryRequest) returns (stream Directory); + + // Put uploads a graph of Directory messages. + // Individual Directory messages need to be send in an order walking up + // from the leaves to the root - a Directory message can only refer to + // Directory messages previously sent in the same stream. + // Keep in mind multiple DirectoryNodes in different parts of the graph might + // have the same digest if they have the same underlying contents, + // so sending subsequent ones can be omitted. + // We might add a separate method, allowing to send partial graphs at a later + // time, if requiring to send the full graph turns out to be a problem. + rpc Put(stream Directory) returns (PutDirectoryResponse); +} + +message GetDirectoryRequest { + oneof by_what { + // The blake3 hash of the (root) Directory message, serialized in + // protobuf canonical form. + // Keep in mind this can be a subtree of another root. + bytes digest = 1; + } + + // If set to true, recursively resolve all child Directory messages. + // Directory messages SHOULD be streamed in a recursive breadth-first walk, + // but other orders are also fine, as long as Directory messages are only + // sent after they are referred to from previously sent Directory messages. + bool recursive = 2; +} + +message PutDirectoryResponse { + bytes root_digest = 1; +} diff --git a/tvix/castore/protos/rpc_directory_grpc.pb.go b/tvix/castore/protos/rpc_directory_grpc.pb.go new file mode 100644 index 000000000000..f19e457d867b --- /dev/null +++ b/tvix/castore/protos/rpc_directory_grpc.pb.go @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: MIT +// Copyright © 2022 The Tvix Authors + +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.3.0 +// - protoc (unknown) +// source: tvix/castore/protos/rpc_directory.proto + +package castorev1 + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.32.0 or later. +const _ = grpc.SupportPackageIsVersion7 + +const ( + DirectoryService_Get_FullMethodName = "/tvix.castore.v1.DirectoryService/Get" + DirectoryService_Put_FullMethodName = "/tvix.castore.v1.DirectoryService/Put" +) + +// DirectoryServiceClient is the client API for DirectoryService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type DirectoryServiceClient interface { + // Get retrieves a stream of Directory messages, by using the lookup + // parameters in GetDirectoryRequest. + // Keep in mind multiple DirectoryNodes in different parts of the graph might + // have the same digest if they have the same underlying contents, + // so sending subsequent ones can be omitted. + Get(ctx context.Context, in *GetDirectoryRequest, opts ...grpc.CallOption) (DirectoryService_GetClient, error) + // Put uploads a graph of Directory messages. + // Individual Directory messages need to be send in an order walking up + // from the leaves to the root - a Directory message can only refer to + // Directory messages previously sent in the same stream. + // Keep in mind multiple DirectoryNodes in different parts of the graph might + // have the same digest if they have the same underlying contents, + // so sending subsequent ones can be omitted. + // We might add a separate method, allowing to send partial graphs at a later + // time, if requiring to send the full graph turns out to be a problem. + Put(ctx context.Context, opts ...grpc.CallOption) (DirectoryService_PutClient, error) +} + +type directoryServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewDirectoryServiceClient(cc grpc.ClientConnInterface) DirectoryServiceClient { + return &directoryServiceClient{cc} +} + +func (c *directoryServiceClient) Get(ctx context.Context, in *GetDirectoryRequest, opts ...grpc.CallOption) (DirectoryService_GetClient, error) { + stream, err := c.cc.NewStream(ctx, &DirectoryService_ServiceDesc.Streams[0], DirectoryService_Get_FullMethodName, opts...) + if err != nil { + return nil, err + } + x := &directoryServiceGetClient{stream} + if err := x.ClientStream.SendMsg(in); err != nil { + return nil, err + } + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + return x, nil +} + +type DirectoryService_GetClient interface { + Recv() (*Directory, error) + grpc.ClientStream +} + +type directoryServiceGetClient struct { + grpc.ClientStream +} + +func (x *directoryServiceGetClient) Recv() (*Directory, error) { + m := new(Directory) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +func (c *directoryServiceClient) Put(ctx context.Context, opts ...grpc.CallOption) (DirectoryService_PutClient, error) { + stream, err := c.cc.NewStream(ctx, &DirectoryService_ServiceDesc.Streams[1], DirectoryService_Put_FullMethodName, opts...) + if err != nil { + return nil, err + } + x := &directoryServicePutClient{stream} + return x, nil +} + +type DirectoryService_PutClient interface { + Send(*Directory) error + CloseAndRecv() (*PutDirectoryResponse, error) + grpc.ClientStream +} + +type directoryServicePutClient struct { + grpc.ClientStream +} + +func (x *directoryServicePutClient) Send(m *Directory) error { + return x.ClientStream.SendMsg(m) +} + +func (x *directoryServicePutClient) CloseAndRecv() (*PutDirectoryResponse, error) { + if err := x.ClientStream.CloseSend(); err != nil { + return nil, err + } + m := new(PutDirectoryResponse) + if err := x.ClientStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +// DirectoryServiceServer is the server API for DirectoryService service. +// All implementations must embed UnimplementedDirectoryServiceServer +// for forward compatibility +type DirectoryServiceServer interface { + // Get retrieves a stream of Directory messages, by using the lookup + // parameters in GetDirectoryRequest. + // Keep in mind multiple DirectoryNodes in different parts of the graph might + // have the same digest if they have the same underlying contents, + // so sending subsequent ones can be omitted. + Get(*GetDirectoryRequest, DirectoryService_GetServer) error + // Put uploads a graph of Directory messages. + // Individual Directory messages need to be send in an order walking up + // from the leaves to the root - a Directory message can only refer to + // Directory messages previously sent in the same stream. + // Keep in mind multiple DirectoryNodes in different parts of the graph might + // have the same digest if they have the same underlying contents, + // so sending subsequent ones can be omitted. + // We might add a separate method, allowing to send partial graphs at a later + // time, if requiring to send the full graph turns out to be a problem. + Put(DirectoryService_PutServer) error + mustEmbedUnimplementedDirectoryServiceServer() +} + +// UnimplementedDirectoryServiceServer must be embedded to have forward compatible implementations. +type UnimplementedDirectoryServiceServer struct { +} + +func (UnimplementedDirectoryServiceServer) Get(*GetDirectoryRequest, DirectoryService_GetServer) error { + return status.Errorf(codes.Unimplemented, "method Get not implemented") +} +func (UnimplementedDirectoryServiceServer) Put(DirectoryService_PutServer) error { + return status.Errorf(codes.Unimplemented, "method Put not implemented") +} +func (UnimplementedDirectoryServiceServer) mustEmbedUnimplementedDirectoryServiceServer() {} + +// UnsafeDirectoryServiceServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to DirectoryServiceServer will +// result in compilation errors. +type UnsafeDirectoryServiceServer interface { + mustEmbedUnimplementedDirectoryServiceServer() +} + +func RegisterDirectoryServiceServer(s grpc.ServiceRegistrar, srv DirectoryServiceServer) { + s.RegisterService(&DirectoryService_ServiceDesc, srv) +} + +func _DirectoryService_Get_Handler(srv interface{}, stream grpc.ServerStream) error { + m := new(GetDirectoryRequest) + if err := stream.RecvMsg(m); err != nil { + return err + } + return srv.(DirectoryServiceServer).Get(m, &directoryServiceGetServer{stream}) +} + +type DirectoryService_GetServer interface { + Send(*Directory) error + grpc.ServerStream +} + +type directoryServiceGetServer struct { + grpc.ServerStream +} + +func (x *directoryServiceGetServer) Send(m *Directory) error { + return x.ServerStream.SendMsg(m) +} + +func _DirectoryService_Put_Handler(srv interface{}, stream grpc.ServerStream) error { + return srv.(DirectoryServiceServer).Put(&directoryServicePutServer{stream}) +} + +type DirectoryService_PutServer interface { + SendAndClose(*PutDirectoryResponse) error + Recv() (*Directory, error) + grpc.ServerStream +} + +type directoryServicePutServer struct { + grpc.ServerStream +} + +func (x *directoryServicePutServer) SendAndClose(m *PutDirectoryResponse) error { + return x.ServerStream.SendMsg(m) +} + +func (x *directoryServicePutServer) Recv() (*Directory, error) { + m := new(Directory) + if err := x.ServerStream.RecvMsg(m); err != nil { + return nil, err + } + return m, nil +} + +// DirectoryService_ServiceDesc is the grpc.ServiceDesc for DirectoryService service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var DirectoryService_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "tvix.castore.v1.DirectoryService", + HandlerType: (*DirectoryServiceServer)(nil), + Methods: []grpc.MethodDesc{}, + Streams: []grpc.StreamDesc{ + { + StreamName: "Get", + Handler: _DirectoryService_Get_Handler, + ServerStreams: true, + }, + { + StreamName: "Put", + Handler: _DirectoryService_Put_Handler, + ClientStreams: true, + }, + }, + Metadata: "tvix/castore/protos/rpc_directory.proto", +} diff --git a/tvix/castore/src/blobservice/from_addr.rs b/tvix/castore/src/blobservice/from_addr.rs new file mode 100644 index 000000000000..2e0a30697d75 --- /dev/null +++ b/tvix/castore/src/blobservice/from_addr.rs @@ -0,0 +1,30 @@ +use std::sync::Arc; +use url::Url; + +use super::{BlobService, GRPCBlobService, MemoryBlobService, SledBlobService}; + +/// Constructs a new instance of a [BlobService] from an URI. +/// +/// The following schemes are supported by the following services: +/// - `memory://` ([MemoryBlobService]) +/// - `sled://` ([SledBlobService]) +/// - `grpc+*://` ([GRPCBlobService]) +/// +/// See their `from_url` methods for more details about their syntax. +pub fn from_addr(uri: &str) -> Result, crate::Error> { + let url = Url::parse(uri) + .map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?; + + Ok(if url.scheme() == "memory" { + Arc::new(MemoryBlobService::from_url(&url)?) + } else if url.scheme() == "sled" { + Arc::new(SledBlobService::from_url(&url)?) + } else if url.scheme().starts_with("grpc+") { + Arc::new(GRPCBlobService::from_url(&url)?) + } else { + Err(crate::Error::StorageError(format!( + "unknown scheme: {}", + url.scheme() + )))? + }) +} diff --git a/tvix/castore/src/blobservice/grpc.rs b/tvix/castore/src/blobservice/grpc.rs new file mode 100644 index 000000000000..db9d4a9c00f0 --- /dev/null +++ b/tvix/castore/src/blobservice/grpc.rs @@ -0,0 +1,426 @@ +use super::{naive_seeker::NaiveSeeker, BlobReader, BlobService, BlobWriter}; +use crate::{proto, B3Digest}; +use futures::sink::SinkExt; +use futures::TryFutureExt; +use std::{ + collections::VecDeque, + io::{self}, + pin::pin, + task::Poll, +}; +use tokio::io::AsyncWriteExt; +use tokio::{net::UnixStream, task::JoinHandle}; +use tokio_stream::{wrappers::ReceiverStream, StreamExt}; +use tokio_util::{ + io::{CopyToBytes, SinkWriter}, + sync::{PollSendError, PollSender}, +}; +use tonic::{async_trait, transport::Channel, Code, Status}; +use tracing::instrument; + +/// Connects to a (remote) tvix-store BlobService over gRPC. +#[derive(Clone)] +pub struct GRPCBlobService { + /// The internal reference to a gRPC client. + /// Cloning it is cheap, and it internally handles concurrent requests. + grpc_client: proto::blob_service_client::BlobServiceClient, +} + +impl GRPCBlobService { + /// construct a [GRPCBlobService] from a [proto::blob_service_client::BlobServiceClient]. + /// panics if called outside the context of a tokio runtime. + pub fn from_client( + grpc_client: proto::blob_service_client::BlobServiceClient, + ) -> Self { + Self { grpc_client } + } +} + +#[async_trait] +impl BlobService for GRPCBlobService { + /// Constructs a [GRPCBlobService] from the passed [url::Url]: + /// - scheme has to match `grpc+*://`. + /// That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts. + /// - In the case of unix sockets, there must be a path, but may not be a host. + /// - In the case of non-unix sockets, there must be a host, but no path. + fn from_url(url: &url::Url) -> Result { + // Start checking for the scheme to start with grpc+. + match url.scheme().strip_prefix("grpc+") { + None => Err(crate::Error::StorageError("invalid scheme".to_string())), + Some(rest) => { + if rest == "unix" { + if url.host_str().is_some() { + return Err(crate::Error::StorageError( + "host may not be set".to_string(), + )); + } + let path = url.path().to_string(); + let channel = tonic::transport::Endpoint::try_from("http://[::]:50051") // doesn't matter + .unwrap() + .connect_with_connector_lazy(tower::service_fn( + move |_: tonic::transport::Uri| UnixStream::connect(path.clone()), + )); + let grpc_client = proto::blob_service_client::BlobServiceClient::new(channel); + Ok(Self::from_client(grpc_client)) + } else { + // ensure path is empty, not supported with gRPC. + if !url.path().is_empty() { + return Err(crate::Error::StorageError( + "path may not be set".to_string(), + )); + } + + // clone the uri, and drop the grpc+ from the scheme. + // Recreate a new uri with the `grpc+` prefix dropped from the scheme. + // We can't use `url.set_scheme(rest)`, as it disallows + // setting something http(s) that previously wasn't. + let url = { + let url_str = url.to_string(); + let s_stripped = url_str.strip_prefix("grpc+").unwrap(); + url::Url::parse(s_stripped).unwrap() + }; + let channel = tonic::transport::Endpoint::try_from(url.to_string()) + .unwrap() + .connect_lazy(); + + let grpc_client = proto::blob_service_client::BlobServiceClient::new(channel); + Ok(Self::from_client(grpc_client)) + } + } + } + } + + #[instrument(skip(self, digest), fields(blob.digest=%digest))] + async fn has(&self, digest: &B3Digest) -> Result { + let mut grpc_client = self.grpc_client.clone(); + let resp = grpc_client + .stat(proto::StatBlobRequest { + digest: digest.clone().into(), + }) + .await; + + match resp { + Ok(_blob_meta) => Ok(true), + Err(e) if e.code() == Code::NotFound => Ok(false), + Err(e) => Err(crate::Error::StorageError(e.to_string())), + } + } + + // On success, this returns a Ok(Some(io::Read)), which can be used to read + // the contents of the Blob, identified by the digest. + async fn open_read( + &self, + digest: &B3Digest, + ) -> Result>, crate::Error> { + // Get a new handle to the gRPC client, and copy the digest. + let mut grpc_client = self.grpc_client.clone(); + + // Get a stream of [proto::BlobChunk], or return an error if the blob + // doesn't exist. + let resp = grpc_client + .read(proto::ReadBlobRequest { + digest: digest.clone().into(), + }) + .await; + + // This runs the task to completion, which on success will return a stream. + // On reading from it, we receive individual [proto::BlobChunk], so we + // massage this to a stream of bytes, + // then create an [AsyncRead], which we'll turn into a [io::Read], + // that's returned from the function. + match resp { + Ok(stream) => { + // map the stream of proto::BlobChunk to bytes. + let data_stream = stream.into_inner().map(|x| { + x.map(|x| VecDeque::from(x.data.to_vec())) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e)) + }); + + // Use StreamReader::new to convert to an AsyncRead. + let data_reader = tokio_util::io::StreamReader::new(data_stream); + + Ok(Some(Box::new(NaiveSeeker::new(data_reader)))) + } + Err(e) if e.code() == Code::NotFound => Ok(None), + Err(e) => Err(crate::Error::StorageError(e.to_string())), + } + } + + /// Returns a BlobWriter, that'll internally wrap each write in a + // [proto::BlobChunk], which is send to the gRPC server. + async fn open_write(&self) -> Box { + let mut grpc_client = self.grpc_client.clone(); + + // set up an mpsc channel passing around Bytes. + let (tx, rx) = tokio::sync::mpsc::channel::(10); + + // bytes arriving on the RX side are wrapped inside a + // [proto::BlobChunk], and a [ReceiverStream] is constructed. + let blobchunk_stream = ReceiverStream::new(rx).map(|x| proto::BlobChunk { data: x }); + + // That receiver stream is used as a stream in the gRPC BlobService.put rpc call. + let task: JoinHandle> = + tokio::spawn(async move { Ok(grpc_client.put(blobchunk_stream).await?.into_inner()) }); + + // The tx part of the channel is converted to a sink of byte chunks. + + // We need to make this a function pointer, not a closure. + fn convert_error(_: PollSendError) -> io::Error { + io::Error::from(io::ErrorKind::BrokenPipe) + } + + let sink = PollSender::new(tx) + .sink_map_err(convert_error as fn(PollSendError) -> io::Error); + // We need to explicitly cast here, otherwise rustc does error with "expected fn pointer, found fn item" + + // … which is turned into an [tokio::io::AsyncWrite]. + let writer = SinkWriter::new(CopyToBytes::new(sink)); + + Box::new(GRPCBlobWriter { + task_and_writer: Some((task, writer)), + digest: None, + }) + } +} + +pub struct GRPCBlobWriter { + /// The task containing the put request, and the inner writer, if we're still writing. + task_and_writer: Option<(JoinHandle>, W)>, + + /// The digest that has been returned, if we successfully closed. + digest: Option, +} + +#[async_trait] +impl BlobWriter for GRPCBlobWriter { + async fn close(&mut self) -> Result { + if self.task_and_writer.is_none() { + // if we're already closed, return the b3 digest, which must exist. + // If it doesn't, we already closed and failed once, and didn't handle the error. + match &self.digest { + Some(digest) => Ok(digest.clone()), + None => Err(crate::Error::StorageError( + "previously closed with error".to_string(), + )), + } + } else { + let (task, mut writer) = self.task_and_writer.take().unwrap(); + + // invoke shutdown, so the inner writer closes its internal tx side of + // the channel. + writer + .shutdown() + .map_err(|e| crate::Error::StorageError(e.to_string())) + .await?; + + // block on the RPC call to return. + // This ensures all chunks are sent out, and have been received by the + // backend. + + match task.await? { + Ok(resp) => { + // return the digest from the response, and store it in self.digest for subsequent closes. + let digest: B3Digest = resp.digest.try_into().map_err(|_| { + crate::Error::StorageError( + "invalid root digest length in response".to_string(), + ) + })?; + self.digest = Some(digest.clone()); + Ok(digest) + } + Err(e) => Err(crate::Error::StorageError(e.to_string())), + } + } + } +} + +impl tokio::io::AsyncWrite for GRPCBlobWriter { + fn poll_write( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + buf: &[u8], + ) -> std::task::Poll> { + match &mut self.task_and_writer { + None => Poll::Ready(Err(io::Error::new( + io::ErrorKind::NotConnected, + "already closed", + ))), + Some((_, ref mut writer)) => { + let pinned_writer = pin!(writer); + pinned_writer.poll_write(cx, buf) + } + } + } + + fn poll_flush( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + match &mut self.task_and_writer { + None => Poll::Ready(Err(io::Error::new( + io::ErrorKind::NotConnected, + "already closed", + ))), + Some((_, ref mut writer)) => { + let pinned_writer = pin!(writer); + pinned_writer.poll_flush(cx) + } + } + } + + fn poll_shutdown( + self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + // TODO(raitobezarius): this might not be a graceful shutdown of the + // channel inside the gRPC connection. + Poll::Ready(Ok(())) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + use std::thread; + + use tempfile::TempDir; + use tokio::net::UnixListener; + use tokio::time; + use tokio_stream::wrappers::UnixListenerStream; + + use crate::blobservice::MemoryBlobService; + use crate::fixtures; + use crate::proto::GRPCBlobServiceWrapper; + + use super::BlobService; + use super::GRPCBlobService; + + /// This uses the wrong scheme + #[test] + fn test_invalid_scheme() { + let url = url::Url::parse("http://foo.example/test").expect("must parse"); + + assert!(GRPCBlobService::from_url(&url).is_err()); + } + + /// This uses the correct scheme for a unix socket. + /// The fact that /path/to/somewhere doesn't exist yet is no problem, because we connect lazily. + #[tokio::test] + async fn test_valid_unix_path() { + let url = url::Url::parse("grpc+unix:///path/to/somewhere").expect("must parse"); + + assert!(GRPCBlobService::from_url(&url).is_ok()); + } + + /// This uses the correct scheme for a unix socket, + /// but sets a host, which is unsupported. + #[tokio::test] + async fn test_invalid_unix_path_with_domain() { + let url = + url::Url::parse("grpc+unix://host.example/path/to/somewhere").expect("must parse"); + + assert!(GRPCBlobService::from_url(&url).is_err()); + } + + /// This uses the correct scheme for a HTTP server. + /// The fact that nothing is listening there is no problem, because we connect lazily. + #[tokio::test] + async fn test_valid_http() { + let url = url::Url::parse("grpc+http://localhost").expect("must parse"); + + assert!(GRPCBlobService::from_url(&url).is_ok()); + } + + /// This uses the correct scheme for a HTTPS server. + /// The fact that nothing is listening there is no problem, because we connect lazily. + #[tokio::test] + async fn test_valid_https() { + let url = url::Url::parse("grpc+https://localhost").expect("must parse"); + + assert!(GRPCBlobService::from_url(&url).is_ok()); + } + + /// This uses the correct scheme, but also specifies + /// an additional path, which is not supported for gRPC. + /// The fact that nothing is listening there is no problem, because we connect lazily. + #[tokio::test] + async fn test_invalid_http_with_path() { + let url = url::Url::parse("grpc+https://localhost/some-path").expect("must parse"); + + assert!(GRPCBlobService::from_url(&url).is_err()); + } + + /// This uses the correct scheme for a unix socket, and provides a server on the other side. + /// This is not a tokio::test, because spawn two separate tokio runtimes and + // want to have explicit control. + #[test] + fn test_valid_unix_path_ping_pong() { + let tmpdir = TempDir::new().unwrap(); + let path = tmpdir.path().join("daemon"); + + let path_clone = path.clone(); + + // Spin up a server, in a thread far away, which spawns its own tokio runtime, + // and blocks on the task. + thread::spawn(move || { + // Create the runtime + let rt = tokio::runtime::Runtime::new().unwrap(); + + let task = rt.spawn(async { + let uds = UnixListener::bind(path_clone).unwrap(); + let uds_stream = UnixListenerStream::new(uds); + + // spin up a new server + let mut server = tonic::transport::Server::builder(); + let router = + server.add_service(crate::proto::blob_service_server::BlobServiceServer::new( + GRPCBlobServiceWrapper::from( + Arc::new(MemoryBlobService::default()) as Arc + ), + )); + router.serve_with_incoming(uds_stream).await + }); + + rt.block_on(task).unwrap().unwrap(); + }); + + // Now create another tokio runtime which we'll use in the main test code. + let rt = tokio::runtime::Runtime::new().unwrap(); + + let task = rt.spawn(async move { + // wait for the socket to be created + { + let mut socket_created = false; + // TODO: exponential backoff urgently + for _try in 1..20 { + if path.exists() { + socket_created = true; + break; + } + tokio::time::sleep(time::Duration::from_millis(20)).await; + } + + assert!( + socket_created, + "expected socket path to eventually get created, but never happened" + ); + } + + // prepare a client + let client = { + let mut url = + url::Url::parse("grpc+unix:///path/to/somewhere").expect("must parse"); + url.set_path(path.to_str().unwrap()); + GRPCBlobService::from_url(&url).expect("must succeed") + }; + + let has = client + .has(&fixtures::BLOB_A_DIGEST) + .await + .expect("must not be err"); + + assert!(!has); + }); + rt.block_on(task).unwrap() + } +} diff --git a/tvix/castore/src/blobservice/memory.rs b/tvix/castore/src/blobservice/memory.rs new file mode 100644 index 000000000000..383127344a17 --- /dev/null +++ b/tvix/castore/src/blobservice/memory.rs @@ -0,0 +1,196 @@ +use std::io::{self, Cursor, Write}; +use std::task::Poll; +use std::{ + collections::HashMap, + sync::{Arc, RwLock}, +}; +use tonic::async_trait; +use tracing::instrument; + +use super::{BlobReader, BlobService, BlobWriter}; +use crate::{B3Digest, Error}; + +#[derive(Clone, Default)] +pub struct MemoryBlobService { + db: Arc>>>, +} + +#[async_trait] +impl BlobService for MemoryBlobService { + /// Constructs a [MemoryBlobService] from the passed [url::Url]: + /// - scheme has to be `memory://` + /// - there may not be a host. + /// - there may not be a path. + fn from_url(url: &url::Url) -> Result { + if url.scheme() != "memory" { + return Err(crate::Error::StorageError("invalid scheme".to_string())); + } + + if url.has_host() || !url.path().is_empty() { + return Err(crate::Error::StorageError("invalid url".to_string())); + } + + Ok(Self::default()) + } + + #[instrument(skip(self, digest), fields(blob.digest=%digest))] + async fn has(&self, digest: &B3Digest) -> Result { + let db = self.db.read().unwrap(); + Ok(db.contains_key(digest)) + } + + async fn open_read(&self, digest: &B3Digest) -> Result>, Error> { + let db = self.db.read().unwrap(); + + match db.get(digest).map(|x| Cursor::new(x.clone())) { + Some(result) => Ok(Some(Box::new(result))), + None => Ok(None), + } + } + + #[instrument(skip(self))] + async fn open_write(&self) -> Box { + Box::new(MemoryBlobWriter::new(self.db.clone())) + } +} + +pub struct MemoryBlobWriter { + db: Arc>>>, + + /// Contains the buffer Vec and hasher, or None if already closed + writers: Option<(Vec, blake3::Hasher)>, + + /// The digest that has been returned, if we successfully closed. + digest: Option, +} + +impl MemoryBlobWriter { + fn new(db: Arc>>>) -> Self { + Self { + db, + writers: Some((Vec::new(), blake3::Hasher::new())), + digest: None, + } + } +} +impl tokio::io::AsyncWrite for MemoryBlobWriter { + fn poll_write( + mut self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + b: &[u8], + ) -> std::task::Poll> { + Poll::Ready(match &mut self.writers { + None => Err(io::Error::new( + io::ErrorKind::NotConnected, + "already closed", + )), + Some((ref mut buf, ref mut hasher)) => { + let bytes_written = buf.write(b)?; + hasher.write(&b[..bytes_written]) + } + }) + } + + fn poll_flush( + self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + Poll::Ready(match self.writers { + None => Err(io::Error::new( + io::ErrorKind::NotConnected, + "already closed", + )), + Some(_) => Ok(()), + }) + } + + fn poll_shutdown( + self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + // shutdown is "instantaneous", we only write to memory. + Poll::Ready(Ok(())) + } +} + +#[async_trait] +impl BlobWriter for MemoryBlobWriter { + async fn close(&mut self) -> Result { + if self.writers.is_none() { + match &self.digest { + Some(digest) => Ok(digest.clone()), + None => Err(crate::Error::StorageError( + "previously closed with error".to_string(), + )), + } + } else { + let (buf, hasher) = self.writers.take().unwrap(); + + // We know self.hasher is doing blake3 hashing, so this won't fail. + let digest: B3Digest = hasher.finalize().as_bytes().into(); + + // Only insert if the blob doesn't already exist. + let db = self.db.read()?; + if !db.contains_key(&digest) { + // drop the read lock, so we can open for writing. + drop(db); + + // open the database for writing. + let mut db = self.db.write()?; + + // and put buf in there. This will move buf out. + db.insert(digest.clone(), buf); + } + + self.digest = Some(digest.clone()); + + Ok(digest) + } + } +} + +#[cfg(test)] +mod tests { + use super::BlobService; + use super::MemoryBlobService; + + /// This uses a wrong scheme. + #[test] + fn test_invalid_scheme() { + let url = url::Url::parse("http://foo.example/test").expect("must parse"); + + assert!(MemoryBlobService::from_url(&url).is_err()); + } + + /// This correctly sets the scheme, and doesn't set a path. + #[test] + fn test_valid_scheme() { + let url = url::Url::parse("memory://").expect("must parse"); + + assert!(MemoryBlobService::from_url(&url).is_ok()); + } + + /// This sets the host to `foo` + #[test] + fn test_invalid_host() { + let url = url::Url::parse("memory://foo").expect("must parse"); + + assert!(MemoryBlobService::from_url(&url).is_err()); + } + + /// This has the path "/", which is invalid. + #[test] + fn test_invalid_has_path() { + let url = url::Url::parse("memory:///").expect("must parse"); + + assert!(MemoryBlobService::from_url(&url).is_err()); + } + + /// This has the path "/foo", which is invalid. + #[test] + fn test_invalid_path2() { + let url = url::Url::parse("memory:///foo").expect("must parse"); + + assert!(MemoryBlobService::from_url(&url).is_err()); + } +} diff --git a/tvix/castore/src/blobservice/mod.rs b/tvix/castore/src/blobservice/mod.rs new file mode 100644 index 000000000000..5ecf25ac1337 --- /dev/null +++ b/tvix/castore/src/blobservice/mod.rs @@ -0,0 +1,62 @@ +use std::io; +use tonic::async_trait; + +use crate::{B3Digest, Error}; + +mod from_addr; +mod grpc; +mod memory; +mod naive_seeker; +mod sled; + +#[cfg(test)] +mod tests; + +pub use self::from_addr::from_addr; +pub use self::grpc::GRPCBlobService; +pub use self::memory::MemoryBlobService; +pub use self::sled::SledBlobService; + +/// The base trait all BlobService services need to implement. +/// It provides functions to check whether a given blob exists, +/// a way to get a [io::Read] to a blob, and a method to initiate writing a new +/// Blob, which will return something implmenting io::Write, and providing a +/// close funtion, to finalize a blob and get its digest. +#[async_trait] +pub trait BlobService: Send + Sync { + /// Create a new instance by passing in a connection URL. + /// TODO: check if we want to make this async, instead of lazily connecting + fn from_url(url: &url::Url) -> Result + where + Self: Sized; + + /// Check if the service has the blob, by its content hash. + async fn has(&self, digest: &B3Digest) -> Result; + + /// Request a blob from the store, by its content hash. + async fn open_read(&self, digest: &B3Digest) -> Result>, Error>; + + /// Insert a new blob into the store. Returns a [BlobWriter], which + /// implements [io::Write] and a [BlobWriter::close]. + async fn open_write(&self) -> Box; +} + +/// A [tokio::io::AsyncWrite] that you need to close() afterwards, and get back +/// the digest of the written blob. +#[async_trait] +pub trait BlobWriter: tokio::io::AsyncWrite + Send + Sync + Unpin + 'static { + /// Signal there's no more data to be written, and return the digest of the + /// contents written. + /// + /// Closing a already-closed BlobWriter is a no-op. + async fn close(&mut self) -> Result; +} + +/// A [tokio::io::AsyncRead] that also allows seeking. +pub trait BlobReader: + tokio::io::AsyncRead + tokio::io::AsyncSeek + tokio::io::AsyncBufRead + Send + Unpin + 'static +{ +} + +/// A [`io::Cursor>`] can be used as a BlobReader. +impl BlobReader for io::Cursor> {} diff --git a/tvix/castore/src/blobservice/naive_seeker.rs b/tvix/castore/src/blobservice/naive_seeker.rs new file mode 100644 index 000000000000..e65a82c7f45a --- /dev/null +++ b/tvix/castore/src/blobservice/naive_seeker.rs @@ -0,0 +1,269 @@ +use super::BlobReader; +use pin_project_lite::pin_project; +use std::io; +use std::task::Poll; +use tokio::io::AsyncRead; +use tracing::{debug, instrument}; + +pin_project! { + /// This implements [tokio::io::AsyncSeek] for and [tokio::io::AsyncRead] by + /// simply skipping over some bytes, keeping track of the position. + /// It fails whenever you try to seek backwards. + /// + /// ## Pinning concerns: + /// + /// [NaiveSeeker] is itself pinned by callers, and we do not need to concern + /// ourselves regarding that. + /// + /// Though, its fields as per + /// + /// can be pinned or unpinned. + /// + /// So we need to go over each field and choose our policy carefully. + /// + /// The obvious cases are the bookkeeping integers we keep in the structure, + /// those are private and not shared to anyone, we never build a + /// `Pin<&mut X>` out of them at any point, therefore, we can safely never + /// mark them as pinned. Of course, it is expected that no developer here + /// attempt to `pin!(self.pos)` to pin them because it makes no sense. If + /// they have to become pinned, they should be marked `#[pin]` and we need + /// to discuss it. + /// + /// So the bookkeeping integers are in the right state with respect to their + /// pinning status. The projection should offer direct access. + /// + /// On the `r` field, i.e. a `BufReader`, given that + /// + /// is available, even a `Pin<&mut BufReader>` can be safely moved. + /// + /// The only care we should have regards the internal reader itself, i.e. + /// the `R` instance, see that Tokio decided to `#[pin]` it too: + /// + /// + /// In general, there's no `Unpin` instance for `R: tokio::io::AsyncRead` + /// (see ). + /// + /// Therefore, we could keep it unpinned and pin it in every call site + /// whenever we need to call `poll_*` which can be confusing to the non- + /// expert developer and we have a fair share amount of situations where the + /// [BufReader] instance is naked, i.e. in its `&mut BufReader` + /// form, this is annoying because it could lead to expose the naked `R` + /// internal instance somehow and would produce a risk of making it move + /// unexpectedly. + /// + /// We choose the path of the least resistance as we have no reason to have + /// access to the raw `BufReader` instance, we just `#[pin]` it too and + /// enjoy its `poll_*` safe APIs and push the unpinning concerns to the + /// internal implementations themselves, which studied the question longer + /// than us. + pub struct NaiveSeeker { + #[pin] + r: tokio::io::BufReader, + pos: u64, + bytes_to_skip: u64, + } +} + +impl NaiveSeeker { + pub fn new(r: R) -> Self { + NaiveSeeker { + r: tokio::io::BufReader::new(r), + pos: 0, + bytes_to_skip: 0, + } + } +} + +impl tokio::io::AsyncRead for NaiveSeeker { + fn poll_read( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + buf: &mut tokio::io::ReadBuf<'_>, + ) -> Poll> { + // The amount of data read can be determined by the increase + // in the length of the slice returned by `ReadBuf::filled`. + let filled_before = buf.filled().len(); + let this = self.project(); + let pos: &mut u64 = this.pos; + + match this.r.poll_read(cx, buf) { + Poll::Ready(a) => { + let bytes_read = buf.filled().len() - filled_before; + *pos += bytes_read as u64; + + Poll::Ready(a) + } + Poll::Pending => Poll::Pending, + } + } +} + +impl tokio::io::AsyncBufRead for NaiveSeeker { + fn poll_fill_buf( + self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + self.project().r.poll_fill_buf(cx) + } + + fn consume(self: std::pin::Pin<&mut Self>, amt: usize) { + let this = self.project(); + this.r.consume(amt); + let pos: &mut u64 = this.pos; + *pos += amt as u64; + } +} + +impl tokio::io::AsyncSeek for NaiveSeeker { + #[instrument(skip(self))] + fn start_seek( + self: std::pin::Pin<&mut Self>, + position: std::io::SeekFrom, + ) -> std::io::Result<()> { + let absolute_offset: u64 = match position { + io::SeekFrom::Start(start_offset) => { + if start_offset < self.pos { + return Err(io::Error::new( + io::ErrorKind::Unsupported, + format!("can't seek backwards ({} -> {})", self.pos, start_offset), + )); + } else { + start_offset + } + } + // we don't know the total size, can't support this. + io::SeekFrom::End(_end_offset) => { + return Err(io::Error::new( + io::ErrorKind::Unsupported, + "can't seek from end", + )); + } + io::SeekFrom::Current(relative_offset) => { + if relative_offset < 0 { + return Err(io::Error::new( + io::ErrorKind::Unsupported, + "can't seek backwards relative to current position", + )); + } else { + self.pos + relative_offset as u64 + } + } + }; + + debug!(absolute_offset=?absolute_offset, "seek"); + + // we already know absolute_offset is larger than self.pos + debug_assert!( + absolute_offset >= self.pos, + "absolute_offset {} is larger than self.pos {}", + absolute_offset, + self.pos + ); + + // calculate bytes to skip + *self.project().bytes_to_skip = absolute_offset - self.pos; + + Ok(()) + } + + #[instrument(skip(self))] + fn poll_complete( + mut self: std::pin::Pin<&mut Self>, + cx: &mut std::task::Context<'_>, + ) -> Poll> { + if self.bytes_to_skip == 0 { + // return the new position (from the start of the stream) + return Poll::Ready(Ok(self.pos)); + } + + // discard some bytes, until pos is where we want it to be. + // We create a buffer that we'll discard later on. + let mut buf = [0; 1024]; + + // Loop until we've reached the desired seek position. This is done by issuing repeated + // `poll_read` calls. If the data is not available yet, we will yield back to the executor + // and wait to be polled again. + loop { + // calculate the length we want to skip at most, which is either a max + // buffer size, or the number of remaining bytes to read, whatever is + // smaller. + let bytes_to_skip = std::cmp::min(self.bytes_to_skip as usize, buf.len()); + + let mut read_buf = tokio::io::ReadBuf::new(&mut buf[..bytes_to_skip]); + + match self.as_mut().poll_read(cx, &mut read_buf) { + Poll::Ready(_a) => { + let bytes_read = read_buf.filled().len() as u64; + + if bytes_read == 0 { + return Poll::Ready(Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + format!( + "tried to skip {} bytes, but only was able to skip {} until reaching EOF", + bytes_to_skip, bytes_read + ), + ))); + } + + // calculate bytes to skip + let bytes_to_skip = self.bytes_to_skip - bytes_read; + + *self.as_mut().project().bytes_to_skip = bytes_to_skip; + + if bytes_to_skip == 0 { + return Poll::Ready(Ok(self.pos)); + } + } + Poll::Pending => return Poll::Pending, + }; + } + } +} + +impl BlobReader for NaiveSeeker {} + +#[cfg(test)] +mod tests { + use super::NaiveSeeker; + use std::io::{Cursor, SeekFrom}; + use tokio::io::{AsyncReadExt, AsyncSeekExt}; + + /// This seek requires multiple `poll_read` as we use a 1024 bytes internal + /// buffer when doing the seek. + /// This ensures we don't hang indefinitely. + #[tokio::test] + async fn seek() { + let buf = vec![0u8; 4096]; + let reader = Cursor::new(&buf); + let mut seeker = NaiveSeeker::new(reader); + seeker.seek(SeekFrom::Start(4000)).await.unwrap(); + } + + #[tokio::test] + async fn seek_read() { + let mut buf = vec![0u8; 2048]; + buf.extend_from_slice(&[1u8; 2048]); + buf.extend_from_slice(&[2u8; 2048]); + + let reader = Cursor::new(&buf); + let mut seeker = NaiveSeeker::new(reader); + + let mut read_buf = vec![0u8; 1024]; + seeker.read_exact(&mut read_buf).await.expect("must read"); + assert_eq!(read_buf.as_slice(), &[0u8; 1024]); + + seeker + .seek(SeekFrom::Current(1024)) + .await + .expect("must seek"); + seeker.read_exact(&mut read_buf).await.expect("must read"); + assert_eq!(read_buf.as_slice(), &[1u8; 1024]); + + seeker + .seek(SeekFrom::Start(2 * 2048)) + .await + .expect("must seek"); + seeker.read_exact(&mut read_buf).await.expect("must read"); + assert_eq!(read_buf.as_slice(), &[2u8; 1024]); + } +} diff --git a/tvix/castore/src/blobservice/sled.rs b/tvix/castore/src/blobservice/sled.rs new file mode 100644 index 000000000000..209f0b76fc7a --- /dev/null +++ b/tvix/castore/src/blobservice/sled.rs @@ -0,0 +1,249 @@ +use super::{BlobReader, BlobService, BlobWriter}; +use crate::{B3Digest, Error}; +use std::{ + io::{self, Cursor, Write}, + path::PathBuf, + task::Poll, +}; +use tonic::async_trait; +use tracing::instrument; + +#[derive(Clone)] +pub struct SledBlobService { + db: sled::Db, +} + +impl SledBlobService { + pub fn new(p: PathBuf) -> Result { + let config = sled::Config::default().use_compression(true).path(p); + let db = config.open()?; + + Ok(Self { db }) + } + + pub fn new_temporary() -> Result { + let config = sled::Config::default().temporary(true); + let db = config.open()?; + + Ok(Self { db }) + } +} + +#[async_trait] +impl BlobService for SledBlobService { + /// Constructs a [SledBlobService] from the passed [url::Url]: + /// - scheme has to be `sled://` + /// - there may not be a host. + /// - a path to the sled needs to be provided (which may not be `/`). + fn from_url(url: &url::Url) -> Result { + if url.scheme() != "sled" { + return Err(crate::Error::StorageError("invalid scheme".to_string())); + } + + if url.has_host() { + return Err(crate::Error::StorageError(format!( + "invalid host: {}", + url.host().unwrap() + ))); + } + + // TODO: expose compression and other parameters as URL parameters, drop new and new_temporary? + if url.path().is_empty() { + Self::new_temporary().map_err(|e| Error::StorageError(e.to_string())) + } else if url.path() == "/" { + Err(crate::Error::StorageError( + "cowardly refusing to open / with sled".to_string(), + )) + } else { + Self::new(url.path().into()).map_err(|e| Error::StorageError(e.to_string())) + } + } + + #[instrument(skip(self), fields(blob.digest=%digest))] + async fn has(&self, digest: &B3Digest) -> Result { + match self.db.contains_key(digest.to_vec()) { + Ok(has) => Ok(has), + Err(e) => Err(Error::StorageError(e.to_string())), + } + } + + #[instrument(skip(self), fields(blob.digest=%digest))] + async fn open_read(&self, digest: &B3Digest) -> Result>, Error> { + match self.db.get(digest.to_vec()) { + Ok(None) => Ok(None), + Ok(Some(data)) => Ok(Some(Box::new(Cursor::new(data[..].to_vec())))), + Err(e) => Err(Error::StorageError(e.to_string())), + } + } + + #[instrument(skip(self))] + async fn open_write(&self) -> Box { + Box::new(SledBlobWriter::new(self.db.clone())) + } +} + +pub struct SledBlobWriter { + db: sled::Db, + + /// Contains the buffer Vec and hasher, or None if already closed + writers: Option<(Vec, blake3::Hasher)>, + + /// The digest that has been returned, if we successfully closed. + digest: Option, +} + +impl SledBlobWriter { + pub fn new(db: sled::Db) -> Self { + Self { + db, + writers: Some((Vec::new(), blake3::Hasher::new())), + digest: None, + } + } +} + +impl tokio::io::AsyncWrite for SledBlobWriter { + fn poll_write( + mut self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + b: &[u8], + ) -> std::task::Poll> { + Poll::Ready(match &mut self.writers { + None => Err(io::Error::new( + io::ErrorKind::NotConnected, + "already closed", + )), + Some((ref mut buf, ref mut hasher)) => { + let bytes_written = buf.write(b)?; + hasher.write(&b[..bytes_written]) + } + }) + } + + fn poll_flush( + mut self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + Poll::Ready(match &mut self.writers { + None => Err(io::Error::new( + io::ErrorKind::NotConnected, + "already closed", + )), + Some(_) => Ok(()), + }) + } + + fn poll_shutdown( + self: std::pin::Pin<&mut Self>, + _cx: &mut std::task::Context<'_>, + ) -> std::task::Poll> { + // shutdown is "instantaneous", we only write to a Vec as buffer. + Poll::Ready(Ok(())) + } +} + +#[async_trait] +impl BlobWriter for SledBlobWriter { + async fn close(&mut self) -> Result { + if self.writers.is_none() { + match &self.digest { + Some(digest) => Ok(digest.clone()), + None => Err(crate::Error::StorageError( + "previously closed with error".to_string(), + )), + } + } else { + let (buf, hasher) = self.writers.take().unwrap(); + + let digest: B3Digest = hasher.finalize().as_bytes().into(); + + // Only insert if the blob doesn't already exist. + if !self.db.contains_key(digest.to_vec()).map_err(|e| { + Error::StorageError(format!("Unable to check if we have blob {}: {}", digest, e)) + })? { + // put buf in there. This will move buf out. + self.db + .insert(digest.to_vec(), buf) + .map_err(|e| Error::StorageError(format!("unable to insert blob: {}", e)))?; + } + + self.digest = Some(digest.clone()); + + Ok(digest) + } + } +} + +#[cfg(test)] +mod tests { + use tempfile::TempDir; + + use super::BlobService; + use super::SledBlobService; + + /// This uses a wrong scheme. + #[test] + fn test_invalid_scheme() { + let url = url::Url::parse("http://foo.example/test").expect("must parse"); + + assert!(SledBlobService::from_url(&url).is_err()); + } + + /// This uses the correct scheme, and doesn't specify a path (temporary sled). + #[test] + fn test_valid_scheme_temporary() { + let url = url::Url::parse("sled://").expect("must parse"); + + assert!(SledBlobService::from_url(&url).is_ok()); + } + + /// This sets the path to a location that doesn't exist, which should fail (as sled doesn't mkdir -p) + #[test] + fn test_nonexistent_path() { + let tmpdir = TempDir::new().unwrap(); + + let mut url = url::Url::parse("sled://foo.example").expect("must parse"); + url.set_path(tmpdir.path().join("foo").join("bar").to_str().unwrap()); + + assert!(SledBlobService::from_url(&url).is_err()); + } + + /// This uses the correct scheme, and specifies / as path (which should fail + // for obvious reasons) + #[test] + fn test_invalid_path_root() { + let url = url::Url::parse("sled:///").expect("must parse"); + + assert!(SledBlobService::from_url(&url).is_err()); + } + + /// This uses the correct scheme, and sets a tempdir as location. + #[test] + fn test_valid_scheme_path() { + let tmpdir = TempDir::new().unwrap(); + + let mut url = url::Url::parse("sled://").expect("must parse"); + url.set_path(tmpdir.path().to_str().unwrap()); + + assert!(SledBlobService::from_url(&url).is_ok()); + } + + /// This sets a host, rather than a path, which should fail. + #[test] + fn test_invalid_host() { + let url = url::Url::parse("sled://foo.example").expect("must parse"); + + assert!(SledBlobService::from_url(&url).is_err()); + } + + /// This sets a host AND a valid path, which should fail + #[test] + fn test_invalid_host_and_path() { + let tmpdir = TempDir::new().unwrap(); + + let mut url = url::Url::parse("sled://foo.example").expect("must parse"); + url.set_path(tmpdir.path().to_str().unwrap()); + + assert!(SledBlobService::from_url(&url).is_err()); + } +} diff --git a/tvix/castore/src/blobservice/tests.rs b/tvix/castore/src/blobservice/tests.rs new file mode 100644 index 000000000000..fe390b537eb8 --- /dev/null +++ b/tvix/castore/src/blobservice/tests.rs @@ -0,0 +1,246 @@ +use std::io; +use std::pin::pin; + +use test_case::test_case; +use tokio::io::AsyncReadExt; +use tokio::io::AsyncSeekExt; + +use super::B3Digest; +use super::BlobService; +use super::MemoryBlobService; +use super::SledBlobService; +use crate::fixtures; + +// TODO: avoid having to define all different services we test against for all functions. +// maybe something like rstest can be used? + +fn gen_memory_blob_service() -> impl BlobService { + MemoryBlobService::default() +} +fn gen_sled_blob_service() -> impl BlobService { + SledBlobService::new_temporary().unwrap() +} + +// TODO: add GRPC blob service here. + +/// Using [BlobService::has] on a non-existing blob should return false +#[test_case(gen_memory_blob_service(); "memory")] +#[test_case(gen_sled_blob_service(); "sled")] +fn has_nonexistent_false(blob_service: impl BlobService) { + tokio::runtime::Runtime::new().unwrap().block_on(async { + assert!(!blob_service + .has(&fixtures::BLOB_A_DIGEST) + .await + .expect("must not fail")); + }) +} + +/// Trying to read a non-existing blob should return a None instead of a reader. +#[test_case(gen_memory_blob_service(); "memory")] +#[test_case(gen_sled_blob_service(); "sled")] +fn not_found_read(blob_service: impl BlobService) { + tokio::runtime::Runtime::new().unwrap().block_on(async { + assert!(blob_service + .open_read(&fixtures::BLOB_A_DIGEST) + .await + .expect("must not fail") + .is_none()) + }) +} + +/// Put a blob in the store, check has, get it back. +/// We test both with small and big blobs. +#[test_case(gen_memory_blob_service(), &fixtures::BLOB_A, &fixtures::BLOB_A_DIGEST; "memory-small")] +#[test_case(gen_sled_blob_service(), &fixtures::BLOB_A, &fixtures::BLOB_A_DIGEST; "sled-small")] +#[test_case(gen_memory_blob_service(), &fixtures::BLOB_B, &fixtures::BLOB_B_DIGEST; "memory-big")] +#[test_case(gen_sled_blob_service(), &fixtures::BLOB_B, &fixtures::BLOB_B_DIGEST; "sled-big")] +fn put_has_get(blob_service: impl BlobService, blob_contents: &[u8], blob_digest: &B3Digest) { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let mut w = blob_service.open_write().await; + + let l = tokio::io::copy(&mut io::Cursor::new(blob_contents), &mut w) + .await + .expect("copy must succeed"); + assert_eq!( + blob_contents.len(), + l as usize, + "written bytes must match blob length" + ); + + let digest = w.close().await.expect("close must succeed"); + + assert_eq!(*blob_digest, digest, "returned digest must be correct"); + + assert!( + blob_service.has(blob_digest).await.expect("must not fail"), + "blob service should now have the blob" + ); + + let mut r = blob_service + .open_read(blob_digest) + .await + .expect("open_read must succeed") + .expect("must be some"); + + let mut buf: Vec = Vec::new(); + let mut pinned_reader = pin!(r); + let l = tokio::io::copy(&mut pinned_reader, &mut buf) + .await + .expect("copy must succeed"); + // let l = io::copy(&mut r, &mut buf).expect("copy must succeed"); + + assert_eq!( + blob_contents.len(), + l as usize, + "read bytes must match blob length" + ); + + assert_eq!(blob_contents, buf, "read blob contents must match"); + }) +} + +/// Put a blob in the store, and seek inside it a bit. +#[test_case(gen_memory_blob_service(); "memory")] +#[test_case(gen_sled_blob_service(); "sled")] +fn put_seek(blob_service: impl BlobService) { + tokio::runtime::Runtime::new().unwrap().block_on(async { + let mut w = blob_service.open_write().await; + + tokio::io::copy(&mut io::Cursor::new(&fixtures::BLOB_B.to_vec()), &mut w) + .await + .expect("copy must succeed"); + w.close().await.expect("close must succeed"); + + // open a blob for reading + let mut r = blob_service + .open_read(&fixtures::BLOB_B_DIGEST) + .await + .expect("open_read must succeed") + .expect("must be some"); + + let mut pos: u64 = 0; + + // read the first 10 bytes, they must match the data in the fixture. + { + let mut buf = [0; 10]; + r.read_exact(&mut buf).await.expect("must succeed"); + + assert_eq!( + &fixtures::BLOB_B[pos as usize..pos as usize + buf.len()], + buf, + "expected first 10 bytes to match" + ); + + pos += buf.len() as u64; + } + // seek by 0 bytes, using SeekFrom::Start. + let p = r + .seek(io::SeekFrom::Start(pos)) + .await + .expect("must not fail"); + assert_eq!(pos, p); + + // read the next 10 bytes, they must match the data in the fixture. + { + let mut buf = [0; 10]; + r.read_exact(&mut buf).await.expect("must succeed"); + + assert_eq!( + &fixtures::BLOB_B[pos as usize..pos as usize + buf.len()], + buf, + "expected data to match" + ); + + pos += buf.len() as u64; + } + + // seek by 5 bytes, using SeekFrom::Start. + let p = r + .seek(io::SeekFrom::Start(pos + 5)) + .await + .expect("must not fail"); + pos += 5; + assert_eq!(pos, p); + + // read the next 10 bytes, they must match the data in the fixture. + { + let mut buf = [0; 10]; + r.read_exact(&mut buf).await.expect("must succeed"); + + assert_eq!( + &fixtures::BLOB_B[pos as usize..pos as usize + buf.len()], + buf, + "expected data to match" + ); + + pos += buf.len() as u64; + } + + // seek by 12345 bytes, using SeekFrom:: + let p = r + .seek(io::SeekFrom::Current(12345)) + .await + .expect("must not fail"); + pos += 12345; + assert_eq!(pos, p); + + // read the next 10 bytes, they must match the data in the fixture. + { + let mut buf = [0; 10]; + r.read_exact(&mut buf).await.expect("must succeed"); + + assert_eq!( + &fixtures::BLOB_B[pos as usize..pos as usize + buf.len()], + buf, + "expected data to match" + ); + + #[allow(unused_assignments)] + { + pos += buf.len() as u64; + } + } + + // seeking to the end is okay… + let p = r + .seek(io::SeekFrom::Start(fixtures::BLOB_B.len() as u64)) + .await + .expect("must not fail"); + pos = fixtures::BLOB_B.len() as u64; + assert_eq!(pos, p); + + { + // but it returns no more data. + let mut buf: Vec = Vec::new(); + r.read_to_end(&mut buf).await.expect("must not fail"); + assert!(buf.is_empty(), "expected no more data to be read"); + } + + // seeking past the end… + match r + .seek(io::SeekFrom::Start(fixtures::BLOB_B.len() as u64 + 1)) + .await + { + // should either be ok, but then return 0 bytes. + // this matches the behaviour or a Cursor>. + Ok(_pos) => { + let mut buf: Vec = Vec::new(); + r.read_to_end(&mut buf).await.expect("must not fail"); + assert!(buf.is_empty(), "expected no more data to be read"); + } + // or not be okay. + Err(_) => {} + } + + // TODO: this is only broken for the gRPC version + // We expect seeking backwards or relative to the end to fail. + // r.seek(io::SeekFrom::Current(-1)) + // .expect_err("SeekFrom::Current(-1) expected to fail"); + + // r.seek(io::SeekFrom::Start(pos - 1)) + // .expect_err("SeekFrom::Start(pos-1) expected to fail"); + + // r.seek(io::SeekFrom::End(0)) + // .expect_err("SeekFrom::End(_) expected to fail"); + }) +} diff --git a/tvix/castore/src/digests.rs b/tvix/castore/src/digests.rs new file mode 100644 index 000000000000..4df11b389e93 --- /dev/null +++ b/tvix/castore/src/digests.rs @@ -0,0 +1,72 @@ +use bytes::Bytes; +use data_encoding::BASE64; +use thiserror::Error; + +#[derive(PartialEq, Eq, Hash, Debug)] +pub struct B3Digest(Bytes); + +// TODO: allow converting these errors to crate::Error +#[derive(Error, Debug)] +pub enum Error { + #[error("invalid digest length: {0}")] + InvalidDigestLen(usize), +} + +impl B3Digest { + // returns a copy of the inner [Vec]. + pub fn to_vec(&self) -> Vec { + self.0.to_vec() + } +} + +impl From for bytes::Bytes { + fn from(val: B3Digest) -> Self { + val.0 + } +} + +impl TryFrom> for B3Digest { + type Error = Error; + + // constructs a [B3Digest] from a [Vec]. + // Returns an error if the digest has the wrong length. + fn try_from(value: Vec) -> Result { + if value.len() != 32 { + Err(Error::InvalidDigestLen(value.len())) + } else { + Ok(Self(value.into())) + } + } +} + +impl TryFrom for B3Digest { + type Error = Error; + + // constructs a [B3Digest] from a [bytes::Bytes]. + // Returns an error if the digest has the wrong length. + fn try_from(value: bytes::Bytes) -> Result { + if value.len() != 32 { + Err(Error::InvalidDigestLen(value.len())) + } else { + Ok(Self(value)) + } + } +} + +impl From<&[u8; 32]> for B3Digest { + fn from(value: &[u8; 32]) -> Self { + Self(value.to_vec().into()) + } +} + +impl Clone for B3Digest { + fn clone(&self) -> Self { + Self(self.0.to_owned()) + } +} + +impl std::fmt::Display for B3Digest { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "b3:{}", BASE64.encode(&self.0)) + } +} diff --git a/tvix/castore/src/directoryservice/from_addr.rs b/tvix/castore/src/directoryservice/from_addr.rs new file mode 100644 index 000000000000..776cf061096c --- /dev/null +++ b/tvix/castore/src/directoryservice/from_addr.rs @@ -0,0 +1,36 @@ +use std::sync::Arc; +use url::Url; + +use super::{DirectoryService, GRPCDirectoryService, MemoryDirectoryService, SledDirectoryService}; + +/// Constructs a new instance of a [DirectoryService] from an URI. +/// +/// The following URIs are supported: +/// - `memory:` +/// Uses a in-memory implementation. +/// - `sled:` +/// Uses a in-memory sled implementation. +/// - `sled:///absolute/path/to/somewhere` +/// Uses sled, using a path on the disk for persistency. Can be only opened +/// from one process at the same time. +/// - `grpc+unix:///absolute/path/to/somewhere` +/// Connects to a local tvix-store gRPC service via Unix socket. +/// - `grpc+http://host:port`, `grpc+https://host:port` +/// Connects to a (remote) tvix-store gRPC service. +pub fn from_addr(uri: &str) -> Result, crate::Error> { + let url = Url::parse(uri) + .map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?; + + Ok(if url.scheme() == "memory" { + Arc::new(MemoryDirectoryService::from_url(&url)?) + } else if url.scheme() == "sled" { + Arc::new(SledDirectoryService::from_url(&url)?) + } else if url.scheme().starts_with("grpc+") { + Arc::new(GRPCDirectoryService::from_url(&url)?) + } else { + Err(crate::Error::StorageError(format!( + "unknown scheme: {}", + url.scheme() + )))? + }) +} diff --git a/tvix/castore/src/directoryservice/grpc.rs b/tvix/castore/src/directoryservice/grpc.rs new file mode 100644 index 000000000000..0f0305341265 --- /dev/null +++ b/tvix/castore/src/directoryservice/grpc.rs @@ -0,0 +1,541 @@ +use std::collections::HashSet; +use std::pin::Pin; + +use super::{DirectoryPutter, DirectoryService}; +use crate::proto::{self, get_directory_request::ByWhat}; +use crate::{B3Digest, Error}; +use async_stream::try_stream; +use futures::Stream; +use tokio::net::UnixStream; +use tokio::spawn; +use tokio::sync::mpsc::UnboundedSender; +use tokio::task::JoinHandle; +use tokio_stream::wrappers::UnboundedReceiverStream; +use tonic::async_trait; +use tonic::Code; +use tonic::{transport::Channel, Status}; +use tracing::{instrument, warn}; + +/// Connects to a (remote) tvix-store DirectoryService over gRPC. +#[derive(Clone)] +pub struct GRPCDirectoryService { + /// The internal reference to a gRPC client. + /// Cloning it is cheap, and it internally handles concurrent requests. + grpc_client: proto::directory_service_client::DirectoryServiceClient, +} + +impl GRPCDirectoryService { + /// construct a [GRPCDirectoryService] from a [proto::directory_service_client::DirectoryServiceClient]. + /// panics if called outside the context of a tokio runtime. + pub fn from_client( + grpc_client: proto::directory_service_client::DirectoryServiceClient, + ) -> Self { + Self { grpc_client } + } +} + +#[async_trait] +impl DirectoryService for GRPCDirectoryService { + /// Constructs a [GRPCDirectoryService] from the passed [url::Url]: + /// - scheme has to match `grpc+*://`. + /// That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts. + /// - In the case of unix sockets, there must be a path, but may not be a host. + /// - In the case of non-unix sockets, there must be a host, but no path. + fn from_url(url: &url::Url) -> Result { + // Start checking for the scheme to start with grpc+. + match url.scheme().strip_prefix("grpc+") { + None => Err(crate::Error::StorageError("invalid scheme".to_string())), + Some(rest) => { + if rest == "unix" { + if url.host_str().is_some() { + return Err(crate::Error::StorageError( + "host may not be set".to_string(), + )); + } + let path = url.path().to_string(); + let channel = tonic::transport::Endpoint::try_from("http://[::]:50051") // doesn't matter + .unwrap() + .connect_with_connector_lazy(tower::service_fn( + move |_: tonic::transport::Uri| UnixStream::connect(path.clone()), + )); + let grpc_client = + proto::directory_service_client::DirectoryServiceClient::new(channel); + Ok(Self::from_client(grpc_client)) + } else { + // ensure path is empty, not supported with gRPC. + if !url.path().is_empty() { + return Err(crate::Error::StorageError( + "path may not be set".to_string(), + )); + } + + // clone the uri, and drop the grpc+ from the scheme. + // Recreate a new uri with the `grpc+` prefix dropped from the scheme. + // We can't use `url.set_scheme(rest)`, as it disallows + // setting something http(s) that previously wasn't. + let url = { + let url_str = url.to_string(); + let s_stripped = url_str.strip_prefix("grpc+").unwrap(); + url::Url::parse(s_stripped).unwrap() + }; + let channel = tonic::transport::Endpoint::try_from(url.to_string()) + .unwrap() + .connect_lazy(); + + let grpc_client = + proto::directory_service_client::DirectoryServiceClient::new(channel); + Ok(Self::from_client(grpc_client)) + } + } + } + } + + async fn get( + &self, + digest: &B3Digest, + ) -> Result, crate::Error> { + // Get a new handle to the gRPC client, and copy the digest. + let mut grpc_client = self.grpc_client.clone(); + let digest_cpy = digest.clone(); + let message = async move { + let mut s = grpc_client + .get(proto::GetDirectoryRequest { + recursive: false, + by_what: Some(ByWhat::Digest(digest_cpy.into())), + }) + .await? + .into_inner(); + + // Retrieve the first message only, then close the stream (we set recursive to false) + s.message().await + }; + + let digest = digest.clone(); + match message.await { + Ok(Some(directory)) => { + // Validate the retrieved Directory indeed has the + // digest we expect it to have, to detect corruptions. + let actual_digest = directory.digest(); + if actual_digest != digest { + Err(crate::Error::StorageError(format!( + "requested directory with digest {}, but got {}", + digest, actual_digest + ))) + } else if let Err(e) = directory.validate() { + // Validate the Directory itself is valid. + warn!("directory failed validation: {}", e.to_string()); + Err(crate::Error::StorageError(format!( + "directory {} failed validation: {}", + digest, e, + ))) + } else { + Ok(Some(directory)) + } + } + Ok(None) => Ok(None), + Err(e) if e.code() == Code::NotFound => Ok(None), + Err(e) => Err(crate::Error::StorageError(e.to_string())), + } + } + + async fn put(&self, directory: crate::proto::Directory) -> Result { + let mut grpc_client = self.grpc_client.clone(); + + let resp = grpc_client.put(tokio_stream::iter(vec![directory])).await; + + match resp { + Ok(put_directory_resp) => Ok(put_directory_resp + .into_inner() + .root_digest + .try_into() + .map_err(|_| { + Error::StorageError("invalid root digest length in response".to_string()) + })?), + Err(e) => Err(crate::Error::StorageError(e.to_string())), + } + } + + #[instrument(skip_all, fields(directory.digest = %root_directory_digest))] + fn get_recursive( + &self, + root_directory_digest: &B3Digest, + ) -> Pin> + Send>> { + let mut grpc_client = self.grpc_client.clone(); + let root_directory_digest = root_directory_digest.clone(); + + let stream = try_stream! { + let mut stream = grpc_client + .get(proto::GetDirectoryRequest { + recursive: true, + by_what: Some(ByWhat::Digest(root_directory_digest.clone().into())), + }) + .await + .map_err(|e| crate::Error::StorageError(e.to_string()))? + .into_inner(); + + // The Directory digests we received so far + let mut received_directory_digests: HashSet = HashSet::new(); + // The Directory digests we're still expecting to get sent. + let mut expected_directory_digests: HashSet = HashSet::from([root_directory_digest]); + + loop { + match stream.message().await { + Ok(Some(directory)) => { + // validate the directory itself. + if let Err(e) = directory.validate() { + Err(crate::Error::StorageError(format!( + "directory {} failed validation: {}", + directory.digest(), + e, + )))?; + } + // validate we actually expected that directory, and move it from expected to received. + let directory_digest = directory.digest(); + let was_expected = expected_directory_digests.remove(&directory_digest); + if !was_expected { + // FUTUREWORK: dumb clients might send the same stuff twice. + // as a fallback, we might want to tolerate receiving + // it if it's in received_directory_digests (as that + // means it once was in expected_directory_digests) + Err(crate::Error::StorageError(format!( + "received unexpected directory {}", + directory_digest + )))?; + } + received_directory_digests.insert(directory_digest); + + // register all children in expected_directory_digests. + for child_directory in &directory.directories { + // We ran validate() above, so we know these digests must be correct. + let child_directory_digest = + child_directory.digest.clone().try_into().unwrap(); + + expected_directory_digests + .insert(child_directory_digest); + } + + yield directory; + }, + Ok(None) => { + // If we were still expecting something, that's an error. + if !expected_directory_digests.is_empty() { + Err(crate::Error::StorageError(format!( + "still expected {} directories, but got premature end of stream", + expected_directory_digests.len(), + )))? + } else { + return + } + }, + Err(e) => { + Err(crate::Error::StorageError(e.to_string()))?; + }, + } + } + }; + + Box::pin(stream) + } + + #[instrument(skip_all)] + fn put_multiple_start(&self) -> Box<(dyn DirectoryPutter + 'static)> + where + Self: Clone, + { + let mut grpc_client = self.grpc_client.clone(); + + let (tx, rx) = tokio::sync::mpsc::unbounded_channel(); + + let task: JoinHandle> = spawn(async move { + let s = grpc_client + .put(UnboundedReceiverStream::new(rx)) + .await? + .into_inner(); + + Ok(s) + }); + + Box::new(GRPCPutter::new(tx, task)) + } +} + +/// Allows uploading multiple Directory messages in the same gRPC stream. +pub struct GRPCPutter { + /// Data about the current request - a handle to the task, and the tx part + /// of the channel. + /// The tx part of the pipe is used to send [proto::Directory] to the ongoing request. + /// The task will yield a [proto::PutDirectoryResponse] once the stream is closed. + #[allow(clippy::type_complexity)] // lol + rq: Option<( + JoinHandle>, + UnboundedSender, + )>, +} + +impl GRPCPutter { + pub fn new( + directory_sender: UnboundedSender, + task: JoinHandle>, + ) -> Self { + Self { + rq: Some((task, directory_sender)), + } + } +} + +#[async_trait] +impl DirectoryPutter for GRPCPutter { + async fn put(&mut self, directory: proto::Directory) -> Result<(), crate::Error> { + match self.rq { + // If we're not already closed, send the directory to directory_sender. + Some((_, ref directory_sender)) => { + if directory_sender.send(directory).is_err() { + // If the channel has been prematurely closed, invoke close (so we can peek at the error code) + // That error code is much more helpful, because it + // contains the error message from the server. + self.close().await?; + } + Ok(()) + } + // If self.close() was already called, we can't put again. + None => Err(Error::StorageError( + "DirectoryPutter already closed".to_string(), + )), + } + } + + /// Closes the stream for sending, and returns the value + async fn close(&mut self) -> Result { + // get self.rq, and replace it with None. + // This ensures we can only close it once. + match std::mem::take(&mut self.rq) { + None => Err(Error::StorageError("already closed".to_string())), + Some((task, directory_sender)) => { + // close directory_sender, so blocking on task will finish. + drop(directory_sender); + + let root_digest = task + .await? + .map_err(|e| Error::StorageError(e.to_string()))? + .root_digest; + + root_digest.try_into().map_err(|_| { + Error::StorageError("invalid root digest length in response".to_string()) + }) + } + } + } + + // allows checking if the tx part of the channel is closed. + fn is_closed(&self) -> bool { + match self.rq { + None => true, + Some((_, ref directory_sender)) => directory_sender.is_closed(), + } + } +} + +#[cfg(test)] +mod tests { + use core::time; + use std::thread; + + use futures::StreamExt; + use tempfile::TempDir; + use tokio::net::{UnixListener, UnixStream}; + use tokio_stream::wrappers::UnixListenerStream; + use tonic::transport::{Endpoint, Server, Uri}; + + use crate::{ + directoryservice::DirectoryService, + fixtures::{DIRECTORY_A, DIRECTORY_B}, + proto, + proto::{directory_service_server::DirectoryServiceServer, GRPCDirectoryServiceWrapper}, + utils::gen_directory_service, + }; + + #[test] + fn test() -> anyhow::Result<()> { + let tmpdir = TempDir::new().unwrap(); + let socket_path = tmpdir.path().join("socket"); + + // Spin up a server, in a thread far away, which spawns its own tokio runtime, + // and blocks on the task. + let socket_path_clone = socket_path.clone(); + thread::spawn(move || { + // Create the runtime + let rt = tokio::runtime::Runtime::new().unwrap(); + // Get a handle from this runtime + let handle = rt.handle(); + + let task = handle.spawn(async { + let uds = UnixListener::bind(socket_path_clone).unwrap(); + let uds_stream = UnixListenerStream::new(uds); + + // spin up a new DirectoryService + let mut server = Server::builder(); + let router = server.add_service(DirectoryServiceServer::new( + GRPCDirectoryServiceWrapper::from(gen_directory_service()), + )); + router.serve_with_incoming(uds_stream).await + }); + + handle.block_on(task) + }); + + // set up the local client runtime. This is similar to what the [tokio:test] macro desugars to. + let tester_runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + + // wait for the socket to be created + { + let mut socket_created = false; + for _try in 1..20 { + if socket_path.exists() { + socket_created = true; + break; + } + std::thread::sleep(time::Duration::from_millis(20)) + } + + assert!( + socket_created, + "expected socket path to eventually get created, but never happened" + ); + } + + tester_runtime.block_on(async move { + // Create a channel, connecting to the uds at socket_path. + // The URI is unused. + let channel = Endpoint::try_from("http://[::]:50051") + .unwrap() + .connect_with_connector_lazy(tower::service_fn(move |_: Uri| { + UnixStream::connect(socket_path.clone()) + })); + + let grpc_client = proto::directory_service_client::DirectoryServiceClient::new(channel); + + // create the GrpcDirectoryService, using the tester_runtime. + let directory_service = super::GRPCDirectoryService::from_client(grpc_client); + + // try to get DIRECTORY_A should return Ok(None) + assert_eq!( + None, + directory_service + .get(&DIRECTORY_A.digest()) + .await + .expect("must not fail") + ); + + // Now upload it + assert_eq!( + DIRECTORY_A.digest(), + directory_service + .put(DIRECTORY_A.clone()) + .await + .expect("must succeed") + ); + + // And retrieve it, compare for equality. + assert_eq!( + DIRECTORY_A.clone(), + directory_service + .get(&DIRECTORY_A.digest()) + .await + .expect("must succeed") + .expect("must be some") + ); + + // Putting DIRECTORY_B alone should fail, because it refers to DIRECTORY_A. + directory_service + .put(DIRECTORY_B.clone()) + .await + .expect_err("must fail"); + + // Putting DIRECTORY_B in a put_multiple will succeed, but the close + // will always fail. + { + let mut handle = directory_service.put_multiple_start(); + handle.put(DIRECTORY_B.clone()).await.expect("must succeed"); + handle.close().await.expect_err("must fail"); + } + + // Uploading A and then B should succeed, and closing should return the digest of B. + let mut handle = directory_service.put_multiple_start(); + handle.put(DIRECTORY_A.clone()).await.expect("must succeed"); + handle.put(DIRECTORY_B.clone()).await.expect("must succeed"); + let digest = handle.close().await.expect("must succeed"); + assert_eq!(DIRECTORY_B.digest(), digest); + + // Now try to retrieve the closure of DIRECTORY_B, which should return B and then A. + let mut directories_it = directory_service.get_recursive(&DIRECTORY_B.digest()); + assert_eq!( + DIRECTORY_B.clone(), + directories_it + .next() + .await + .expect("must be some") + .expect("must succeed") + ); + assert_eq!( + DIRECTORY_A.clone(), + directories_it + .next() + .await + .expect("must be some") + .expect("must succeed") + ); + + // Uploading B and then A should fail, because B refers to A, which + // hasn't been uploaded yet. + // However, the client can burst, so we might not have received the + // error back from the server. + { + let mut handle = directory_service.put_multiple_start(); + // sending out B will always be fine + handle.put(DIRECTORY_B.clone()).await.expect("must succeed"); + + // whether we will be able to put A as well depends on whether we + // already received the error about B. + if handle.put(DIRECTORY_A.clone()).await.is_ok() { + // If we didn't, and this was Ok(_), … + // a subsequent close MUST fail (because it waits for the + // server) + handle.close().await.expect_err("must fail"); + } + } + + // Now we do the same test as before, send B, then A, but wait + // sufficiently enough for the server to have s + // to close us the stream, + // and then assert that uploading anything else via the handle will fail. + { + let mut handle = directory_service.put_multiple_start(); + handle.put(DIRECTORY_B.clone()).await.expect("must succeed"); + + let mut is_closed = false; + for _try in 1..1000 { + if handle.is_closed() { + is_closed = true; + break; + } + tokio::time::sleep(time::Duration::from_millis(10)).await; + } + + assert!( + is_closed, + "expected channel to eventually close, but never happened" + ); + + handle + .put(DIRECTORY_A.clone()) + .await + .expect_err("must fail"); + } + }); + + Ok(()) + } +} diff --git a/tvix/castore/src/directoryservice/memory.rs b/tvix/castore/src/directoryservice/memory.rs new file mode 100644 index 000000000000..ac67c999d01b --- /dev/null +++ b/tvix/castore/src/directoryservice/memory.rs @@ -0,0 +1,149 @@ +use crate::{proto, B3Digest, Error}; +use futures::Stream; +use std::collections::HashMap; +use std::pin::Pin; +use std::sync::{Arc, RwLock}; +use tonic::async_trait; +use tracing::{instrument, warn}; + +use super::utils::{traverse_directory, SimplePutter}; +use super::{DirectoryPutter, DirectoryService}; + +#[derive(Clone, Default)] +pub struct MemoryDirectoryService { + db: Arc>>, +} + +#[async_trait] +impl DirectoryService for MemoryDirectoryService { + /// Constructs a [MemoryDirectoryService] from the passed [url::Url]: + /// - scheme has to be `memory://` + /// - there may not be a host. + /// - there may not be a path. + fn from_url(url: &url::Url) -> Result { + if url.scheme() != "memory" { + return Err(crate::Error::StorageError("invalid scheme".to_string())); + } + + if url.has_host() || !url.path().is_empty() { + return Err(crate::Error::StorageError("invalid url".to_string())); + } + + Ok(Self::default()) + } + + #[instrument(skip(self, digest), fields(directory.digest = %digest))] + async fn get(&self, digest: &B3Digest) -> Result, Error> { + let db = self.db.read()?; + + match db.get(digest) { + // The directory was not found, return + None => Ok(None), + + // The directory was found, try to parse the data as Directory message + Some(directory) => { + // Validate the retrieved Directory indeed has the + // digest we expect it to have, to detect corruptions. + let actual_digest = directory.digest(); + if actual_digest != *digest { + return Err(Error::StorageError(format!( + "requested directory with digest {}, but got {}", + digest, actual_digest + ))); + } + + // Validate the Directory itself is valid. + if let Err(e) = directory.validate() { + warn!("directory failed validation: {}", e.to_string()); + return Err(Error::StorageError(format!( + "directory {} failed validation: {}", + actual_digest, e, + ))); + } + + Ok(Some(directory.clone())) + } + } + } + + #[instrument(skip(self, directory), fields(directory.digest = %directory.digest()))] + async fn put(&self, directory: proto::Directory) -> Result { + let digest = directory.digest(); + + // validate the directory itself. + if let Err(e) = directory.validate() { + return Err(Error::InvalidRequest(format!( + "directory {} failed validation: {}", + digest, e, + ))); + } + + // store it + let mut db = self.db.write()?; + db.insert(digest.clone(), directory); + + Ok(digest) + } + + #[instrument(skip_all, fields(directory.digest = %root_directory_digest))] + fn get_recursive( + &self, + root_directory_digest: &B3Digest, + ) -> Pin> + Send>> { + traverse_directory(self.clone(), root_directory_digest) + } + + #[instrument(skip_all)] + fn put_multiple_start(&self) -> Box<(dyn DirectoryPutter + 'static)> + where + Self: Clone, + { + Box::new(SimplePutter::new(self.clone())) + } +} + +#[cfg(test)] +mod tests { + use super::DirectoryService; + use super::MemoryDirectoryService; + + /// This uses a wrong scheme. + #[test] + fn test_invalid_scheme() { + let url = url::Url::parse("http://foo.example/test").expect("must parse"); + + assert!(MemoryDirectoryService::from_url(&url).is_err()); + } + + /// This correctly sets the scheme, and doesn't set a path. + #[test] + fn test_valid_scheme() { + let url = url::Url::parse("memory://").expect("must parse"); + + assert!(MemoryDirectoryService::from_url(&url).is_ok()); + } + + /// This sets the host to `foo` + #[test] + fn test_invalid_host() { + let url = url::Url::parse("memory://foo").expect("must parse"); + + assert!(MemoryDirectoryService::from_url(&url).is_err()); + } + + /// This has the path "/", which is invalid. + #[test] + fn test_invalid_has_path() { + let url = url::Url::parse("memory:///").expect("must parse"); + + assert!(MemoryDirectoryService::from_url(&url).is_err()); + } + + /// This has the path "/foo", which is invalid. + #[test] + fn test_invalid_path2() { + let url = url::Url::parse("memory:///foo").expect("must parse"); + + assert!(MemoryDirectoryService::from_url(&url).is_err()); + } +} diff --git a/tvix/castore/src/directoryservice/mod.rs b/tvix/castore/src/directoryservice/mod.rs new file mode 100644 index 000000000000..3b26f4baf79b --- /dev/null +++ b/tvix/castore/src/directoryservice/mod.rs @@ -0,0 +1,76 @@ +use crate::{proto, B3Digest, Error}; +use futures::Stream; +use std::pin::Pin; +use tonic::async_trait; + +mod from_addr; +mod grpc; +mod memory; +mod sled; +mod traverse; +mod utils; + +pub use self::from_addr::from_addr; +pub use self::grpc::GRPCDirectoryService; +pub use self::memory::MemoryDirectoryService; +pub use self::sled::SledDirectoryService; +pub use self::traverse::descend_to; + +/// The base trait all Directory services need to implement. +/// This is a simple get and put of [crate::proto::Directory], returning their +/// digest. +#[async_trait] +pub trait DirectoryService: Send + Sync { + /// Create a new instance by passing in a connection URL. + /// TODO: check if we want to make this async, instead of lazily connecting + fn from_url(url: &url::Url) -> Result + where + Self: Sized; + + /// Get looks up a single Directory message by its digest. + /// In case the directory is not found, Ok(None) is returned. + async fn get(&self, digest: &B3Digest) -> Result, Error>; + /// Get uploads a single Directory message, and returns the calculated + /// digest, or an error. + async fn put(&self, directory: proto::Directory) -> Result; + + /// Looks up a closure of [proto::Directory]. + /// Ideally this would be a `impl Stream>`, + /// and we'd be able to add a default implementation for it here, but + /// we can't have that yet. + /// + /// This returns a pinned, boxed stream. The pinning allows for it to be polled easily, + /// and the box allows different underlying stream implementations to be returned since + /// Rust doesn't support this as a generic in traits yet. This is the same thing that + /// [async_trait] generates, but for streams instead of futures. + fn get_recursive( + &self, + root_directory_digest: &B3Digest, + ) -> Pin> + Send>>; + + /// Allows persisting a closure of [proto::Directory], which is a graph of + /// connected Directory messages. + fn put_multiple_start(&self) -> Box; +} + +/// Provides a handle to put a closure of connected [proto::Directory] elements. +/// +/// The consumer can periodically call [DirectoryPutter::put], starting from the +/// leaves. Once the root is reached, [DirectoryPutter::close] can be called to +/// retrieve the root digest (or an error). +#[async_trait] +pub trait DirectoryPutter: Send { + /// Put a individual [proto::Directory] into the store. + /// Error semantics and behaviour is up to the specific implementation of + /// this trait. + /// Due to bursting, the returned error might refer to an object previously + /// sent via `put`. + async fn put(&mut self, directory: proto::Directory) -> Result<(), Error>; + + /// Close the stream, and wait for any errors. + async fn close(&mut self) -> Result; + + /// Return whether the stream is closed or not. + /// Used from some [DirectoryService] implementations only. + fn is_closed(&self) -> bool; +} diff --git a/tvix/castore/src/directoryservice/sled.rs b/tvix/castore/src/directoryservice/sled.rs new file mode 100644 index 000000000000..0dc5496803cb --- /dev/null +++ b/tvix/castore/src/directoryservice/sled.rs @@ -0,0 +1,213 @@ +use crate::directoryservice::DirectoryPutter; +use crate::proto::Directory; +use crate::{proto, B3Digest, Error}; +use futures::Stream; +use prost::Message; +use std::path::PathBuf; +use std::pin::Pin; +use tonic::async_trait; +use tracing::{instrument, warn}; + +use super::utils::{traverse_directory, SimplePutter}; +use super::DirectoryService; + +#[derive(Clone)] +pub struct SledDirectoryService { + db: sled::Db, +} + +impl SledDirectoryService { + pub fn new(p: PathBuf) -> Result { + let config = sled::Config::default().use_compression(true).path(p); + let db = config.open()?; + + Ok(Self { db }) + } + + pub fn new_temporary() -> Result { + let config = sled::Config::default().temporary(true); + let db = config.open()?; + + Ok(Self { db }) + } +} + +#[async_trait] +impl DirectoryService for SledDirectoryService { + /// Constructs a [SledDirectoryService] from the passed [url::Url]: + /// - scheme has to be `sled://` + /// - there may not be a host. + /// - a path to the sled needs to be provided (which may not be `/`). + fn from_url(url: &url::Url) -> Result { + if url.scheme() != "sled" { + return Err(crate::Error::StorageError("invalid scheme".to_string())); + } + + if url.has_host() { + return Err(crate::Error::StorageError(format!( + "invalid host: {}", + url.host().unwrap() + ))); + } + + // TODO: expose compression and other parameters as URL parameters, drop new and new_temporary? + if url.path().is_empty() { + Self::new_temporary().map_err(|e| Error::StorageError(e.to_string())) + } else if url.path() == "/" { + Err(crate::Error::StorageError( + "cowardly refusing to open / with sled".to_string(), + )) + } else { + Self::new(url.path().into()).map_err(|e| Error::StorageError(e.to_string())) + } + } + + #[instrument(skip(self, digest), fields(directory.digest = %digest))] + async fn get(&self, digest: &B3Digest) -> Result, Error> { + match self.db.get(digest.to_vec()) { + // The directory was not found, return + Ok(None) => Ok(None), + + // The directory was found, try to parse the data as Directory message + Ok(Some(data)) => match Directory::decode(&*data) { + Ok(directory) => { + // Validate the retrieved Directory indeed has the + // digest we expect it to have, to detect corruptions. + let actual_digest = directory.digest(); + if actual_digest != *digest { + return Err(Error::StorageError(format!( + "requested directory with digest {}, but got {}", + digest, actual_digest + ))); + } + + // Validate the Directory itself is valid. + if let Err(e) = directory.validate() { + warn!("directory failed validation: {}", e.to_string()); + return Err(Error::StorageError(format!( + "directory {} failed validation: {}", + actual_digest, e, + ))); + } + + Ok(Some(directory)) + } + Err(e) => { + warn!("unable to parse directory {}: {}", digest, e); + Err(Error::StorageError(e.to_string())) + } + }, + // some storage error? + Err(e) => Err(Error::StorageError(e.to_string())), + } + } + + #[instrument(skip(self, directory), fields(directory.digest = %directory.digest()))] + async fn put(&self, directory: proto::Directory) -> Result { + let digest = directory.digest(); + + // validate the directory itself. + if let Err(e) = directory.validate() { + return Err(Error::InvalidRequest(format!( + "directory {} failed validation: {}", + digest, e, + ))); + } + // store it + let result = self.db.insert(digest.to_vec(), directory.encode_to_vec()); + if let Err(e) = result { + return Err(Error::StorageError(e.to_string())); + } + Ok(digest) + } + + #[instrument(skip_all, fields(directory.digest = %root_directory_digest))] + fn get_recursive( + &self, + root_directory_digest: &B3Digest, + ) -> Pin> + Send + 'static)>> { + traverse_directory(self.clone(), root_directory_digest) + } + + #[instrument(skip_all)] + fn put_multiple_start(&self) -> Box<(dyn DirectoryPutter + 'static)> + where + Self: Clone, + { + Box::new(SimplePutter::new(self.clone())) + } +} + +#[cfg(test)] +mod tests { + use tempfile::TempDir; + + use super::DirectoryService; + use super::SledDirectoryService; + + /// This uses a wrong scheme. + #[test] + fn test_invalid_scheme() { + let url = url::Url::parse("http://foo.example/test").expect("must parse"); + + assert!(SledDirectoryService::from_url(&url).is_err()); + } + + /// This uses the correct scheme, and doesn't specify a path (temporary sled). + #[test] + fn test_valid_scheme_temporary() { + let url = url::Url::parse("sled://").expect("must parse"); + + assert!(SledDirectoryService::from_url(&url).is_ok()); + } + + /// This sets the path to a location that doesn't exist, which should fail (as sled doesn't mkdir -p) + #[test] + fn test_nonexistent_path() { + let tmpdir = TempDir::new().unwrap(); + + let mut url = url::Url::parse("sled://foo.example").expect("must parse"); + url.set_path(tmpdir.path().join("foo").join("bar").to_str().unwrap()); + + assert!(SledDirectoryService::from_url(&url).is_err()); + } + + /// This uses the correct scheme, and specifies / as path (which should fail + // for obvious reasons) + #[test] + fn test_invalid_path_root() { + let url = url::Url::parse("sled:///").expect("must parse"); + + assert!(SledDirectoryService::from_url(&url).is_err()); + } + + /// This uses the correct scheme, and sets a tempdir as location. + #[test] + fn test_valid_scheme_path() { + let tmpdir = TempDir::new().unwrap(); + + let mut url = url::Url::parse("sled://").expect("must parse"); + url.set_path(tmpdir.path().to_str().unwrap()); + + assert!(SledDirectoryService::from_url(&url).is_ok()); + } + + /// This sets a host, rather than a path, which should fail. + #[test] + fn test_invalid_host() { + let url = url::Url::parse("sled://foo.example").expect("must parse"); + + assert!(SledDirectoryService::from_url(&url).is_err()); + } + + /// This sets a host AND a valid path, which should fail + #[test] + fn test_invalid_host_and_path() { + let tmpdir = TempDir::new().unwrap(); + + let mut url = url::Url::parse("sled://foo.example").expect("must parse"); + url.set_path(tmpdir.path().to_str().unwrap()); + + assert!(SledDirectoryService::from_url(&url).is_err()); + } +} diff --git a/tvix/castore/src/directoryservice/traverse.rs b/tvix/castore/src/directoryservice/traverse.rs new file mode 100644 index 000000000000..4f011c963c06 --- /dev/null +++ b/tvix/castore/src/directoryservice/traverse.rs @@ -0,0 +1,228 @@ +use super::DirectoryService; +use crate::{proto::NamedNode, B3Digest, Error}; +use std::{os::unix::ffi::OsStrExt, sync::Arc}; +use tracing::{instrument, warn}; + +/// This descends from a (root) node to the given (sub)path, returning the Node +/// at that path, or none, if there's nothing at that path. +#[instrument(skip(directory_service))] +pub async fn descend_to( + directory_service: Arc, + root_node: crate::proto::node::Node, + path: &std::path::Path, +) -> Result, Error> { + // strip a possible `/` prefix from the path. + let path = { + if path.starts_with("/") { + path.strip_prefix("/").unwrap() + } else { + path + } + }; + + let mut cur_node = root_node; + let mut it = path.components(); + + loop { + match it.next() { + None => { + // the (remaining) path is empty, return the node we're current at. + return Ok(Some(cur_node)); + } + Some(first_component) => { + match cur_node { + crate::proto::node::Node::File(_) | crate::proto::node::Node::Symlink(_) => { + // There's still some path left, but the current node is no directory. + // This means the path doesn't exist, as we can't reach it. + return Ok(None); + } + crate::proto::node::Node::Directory(directory_node) => { + let digest: B3Digest = directory_node.digest.try_into().map_err(|_e| { + Error::StorageError("invalid digest length".to_string()) + })?; + + // fetch the linked node from the directory_service + match directory_service.get(&digest).await? { + // If we didn't get the directory node that's linked, that's a store inconsistency, bail out! + None => { + warn!("directory {} does not exist", digest); + + return Err(Error::StorageError(format!( + "directory {} does not exist", + digest + ))); + } + Some(directory) => { + // look for first_component in the [Directory]. + // FUTUREWORK: as the nodes() iterator returns in a sorted fashion, we + // could stop as soon as e.name is larger than the search string. + let child_node = directory.nodes().find(|n| { + n.get_name() == first_component.as_os_str().as_bytes() + }); + + match child_node { + // child node not found means there's no such element inside the directory. + None => { + return Ok(None); + } + // child node found, return to top-of loop to find the next + // node in the path. + Some(child_node) => { + cur_node = child_node; + } + } + } + } + } + } + } + } + } +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use crate::fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP}; + use crate::utils::gen_directory_service; + + use super::descend_to; + + #[tokio::test] + async fn test_descend_to() { + let directory_service = gen_directory_service(); + + let mut handle = directory_service.put_multiple_start(); + handle + .put(DIRECTORY_WITH_KEEP.clone()) + .await + .expect("must succeed"); + handle + .put(DIRECTORY_COMPLICATED.clone()) + .await + .expect("must succeed"); + + // construct the node for DIRECTORY_COMPLICATED + let node_directory_complicated = + crate::proto::node::Node::Directory(crate::proto::DirectoryNode { + name: "doesntmatter".into(), + digest: DIRECTORY_COMPLICATED.digest().into(), + size: DIRECTORY_COMPLICATED.size(), + }); + + // construct the node for DIRECTORY_COMPLICATED + let node_directory_with_keep = crate::proto::node::Node::Directory( + DIRECTORY_COMPLICATED.directories.first().unwrap().clone(), + ); + + // construct the node for the .keep file + let node_file_keep = + crate::proto::node::Node::File(DIRECTORY_WITH_KEEP.files.first().unwrap().clone()); + + // traversal to an empty subpath should return the root node. + { + let resp = descend_to( + directory_service.clone(), + node_directory_complicated.clone(), + &PathBuf::from(""), + ) + .await + .expect("must succeed"); + + assert_eq!(Some(node_directory_complicated.clone()), resp); + } + + // traversal to `keep` should return the node for DIRECTORY_WITH_KEEP + { + let resp = descend_to( + directory_service.clone(), + node_directory_complicated.clone(), + &PathBuf::from("keep"), + ) + .await + .expect("must succeed"); + + assert_eq!(Some(node_directory_with_keep), resp); + } + + // traversal to `keep/.keep` should return the node for the .keep file + { + let resp = descend_to( + directory_service.clone(), + node_directory_complicated.clone(), + &PathBuf::from("keep/.keep"), + ) + .await + .expect("must succeed"); + + assert_eq!(Some(node_file_keep.clone()), resp); + } + + // traversal to `keep/.keep` should return the node for the .keep file + { + let resp = descend_to( + directory_service.clone(), + node_directory_complicated.clone(), + &PathBuf::from("/keep/.keep"), + ) + .await + .expect("must succeed"); + + assert_eq!(Some(node_file_keep), resp); + } + + // traversal to `void` should return None (doesn't exist) + { + let resp = descend_to( + directory_service.clone(), + node_directory_complicated.clone(), + &PathBuf::from("void"), + ) + .await + .expect("must succeed"); + + assert_eq!(None, resp); + } + + // traversal to `void` should return None (doesn't exist) + { + let resp = descend_to( + directory_service.clone(), + node_directory_complicated.clone(), + &PathBuf::from("//v/oid"), + ) + .await + .expect("must succeed"); + + assert_eq!(None, resp); + } + + // traversal to `keep/.keep/404` should return None (the path can't be + // reached, as keep/.keep already is a file) + { + let resp = descend_to( + directory_service.clone(), + node_directory_complicated.clone(), + &PathBuf::from("keep/.keep/foo"), + ) + .await + .expect("must succeed"); + + assert_eq!(None, resp); + } + + // traversal to a subpath of '/' should return the root node. + { + let resp = descend_to( + directory_service.clone(), + node_directory_complicated.clone(), + &PathBuf::from("/"), + ) + .await + .expect("must succeed"); + + assert_eq!(Some(node_directory_complicated), resp); + } + } +} diff --git a/tvix/castore/src/directoryservice/utils.rs b/tvix/castore/src/directoryservice/utils.rs new file mode 100644 index 000000000000..4c5e7cfde37c --- /dev/null +++ b/tvix/castore/src/directoryservice/utils.rs @@ -0,0 +1,140 @@ +use super::DirectoryPutter; +use super::DirectoryService; +use crate::proto; +use crate::B3Digest; +use crate::Error; +use async_stream::stream; +use futures::Stream; +use std::collections::{HashSet, VecDeque}; +use std::pin::Pin; +use tonic::async_trait; +use tracing::warn; + +/// Traverses a [proto::Directory] from the root to the children. +/// +/// This is mostly BFS, but directories are only returned once. +pub fn traverse_directory( + directory_service: DS, + root_directory_digest: &B3Digest, +) -> Pin> + Send>> { + // The list of all directories that still need to be traversed. The next + // element is picked from the front, new elements are enqueued at the + // back. + let mut worklist_directory_digests: VecDeque = + VecDeque::from([root_directory_digest.clone()]); + // The list of directory digests already sent to the consumer. + // We omit sending the same directories multiple times. + let mut sent_directory_digests: HashSet = HashSet::new(); + + let stream = stream! { + while let Some(current_directory_digest) = worklist_directory_digests.pop_front() { + match directory_service.get(¤t_directory_digest).await { + // if it's not there, we have an inconsistent store! + Ok(None) => { + warn!("directory {} does not exist", current_directory_digest); + yield Err(Error::StorageError(format!( + "directory {} does not exist", + current_directory_digest + ))); + } + Err(e) => { + warn!("failed to look up directory"); + yield Err(Error::StorageError(format!( + "unable to look up directory {}: {}", + current_directory_digest, e + ))); + } + + // if we got it + Ok(Some(current_directory)) => { + // validate, we don't want to send invalid directories. + if let Err(e) = current_directory.validate() { + warn!("directory failed validation: {}", e.to_string()); + yield Err(Error::StorageError(format!( + "invalid directory: {}", + current_directory_digest + ))); + } + + // We're about to send this directory, so let's avoid sending it again if a + // descendant has it. + sent_directory_digests.insert(current_directory_digest); + + // enqueue all child directory digests to the work queue, as + // long as they're not part of the worklist or already sent. + // This panics if the digest looks invalid, it's supposed to be checked first. + for child_directory_node in ¤t_directory.directories { + // TODO: propagate error + let child_digest: B3Digest = child_directory_node.digest.clone().try_into().unwrap(); + + if worklist_directory_digests.contains(&child_digest) + || sent_directory_digests.contains(&child_digest) + { + continue; + } + worklist_directory_digests.push_back(child_digest); + } + + yield Ok(current_directory); + } + }; + } + }; + + Box::pin(stream) +} + +/// This is a simple implementation of a Directory uploader. +/// TODO: verify connectivity? Factor out these checks into generic helpers? +pub struct SimplePutter { + directory_service: DS, + last_directory_digest: Option, + closed: bool, +} + +impl SimplePutter { + pub fn new(directory_service: DS) -> Self { + Self { + directory_service, + closed: false, + last_directory_digest: None, + } + } +} + +#[async_trait] +impl DirectoryPutter for SimplePutter { + async fn put(&mut self, directory: proto::Directory) -> Result<(), Error> { + if self.closed { + return Err(Error::StorageError("already closed".to_string())); + } + + let digest = self.directory_service.put(directory).await?; + + // track the last directory digest + self.last_directory_digest = Some(digest); + + Ok(()) + } + + /// We need to be mutable here, as that's the signature of the trait. + async fn close(&mut self) -> Result { + if self.closed { + return Err(Error::StorageError("already closed".to_string())); + } + + match &self.last_directory_digest { + Some(last_digest) => { + self.closed = true; + Ok(last_digest.clone()) + } + None => Err(Error::InvalidRequest( + "no directories sent, can't show root digest".to_string(), + )), + } + } + + fn is_closed(&self) -> bool { + self.closed + } +} diff --git a/tvix/castore/src/errors.rs b/tvix/castore/src/errors.rs new file mode 100644 index 000000000000..3b23f972b045 --- /dev/null +++ b/tvix/castore/src/errors.rs @@ -0,0 +1,45 @@ +use std::sync::PoisonError; +use thiserror::Error; +use tokio::task::JoinError; +use tonic::Status; + +/// Errors related to communication with the store. +#[derive(Debug, Error)] +pub enum Error { + #[error("invalid request: {0}")] + InvalidRequest(String), + + #[error("internal storage error: {0}")] + StorageError(String), +} + +impl From> for Error { + fn from(value: PoisonError) -> Self { + Error::StorageError(value.to_string()) + } +} + +impl From for Error { + fn from(value: JoinError) -> Self { + Error::StorageError(value.to_string()) + } +} + +impl From for Status { + fn from(value: Error) -> Self { + match value { + Error::InvalidRequest(msg) => Status::invalid_argument(msg), + Error::StorageError(msg) => Status::data_loss(format!("storage error: {}", msg)), + } + } +} + +// TODO: this should probably go somewhere else? +impl From for std::io::Error { + fn from(value: Error) -> Self { + match value { + Error::InvalidRequest(msg) => Self::new(std::io::ErrorKind::InvalidInput, msg), + Error::StorageError(msg) => Self::new(std::io::ErrorKind::Other, msg), + } + } +} diff --git a/tvix/castore/src/fixtures.rs b/tvix/castore/src/fixtures.rs new file mode 100644 index 000000000000..ed3d1ca6e855 --- /dev/null +++ b/tvix/castore/src/fixtures.rs @@ -0,0 +1,95 @@ +use crate::{ + proto::{self, Directory, DirectoryNode, FileNode, SymlinkNode}, + B3Digest, +}; +use lazy_static::lazy_static; + +pub const HELLOWORLD_BLOB_CONTENTS: &[u8] = b"Hello World!"; +pub const EMPTY_BLOB_CONTENTS: &[u8] = b""; + +lazy_static! { + pub static ref DUMMY_DIGEST: B3Digest = { + let u: &[u8; 32] = &[ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, + ]; + u.into() + }; + pub static ref DUMMY_DIGEST_2: B3Digest = { + let u: &[u8; 32] = &[ + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + ]; + u.into() + }; + pub static ref DUMMY_DATA_1: bytes::Bytes = vec![0x01, 0x02, 0x03].into(); + pub static ref DUMMY_DATA_2: bytes::Bytes = vec![0x04, 0x05].into(); + + pub static ref HELLOWORLD_BLOB_DIGEST: B3Digest = + blake3::hash(HELLOWORLD_BLOB_CONTENTS).as_bytes().into(); + pub static ref EMPTY_BLOB_DIGEST: B3Digest = + blake3::hash(EMPTY_BLOB_CONTENTS).as_bytes().into(); + + // 2 bytes + pub static ref BLOB_A: bytes::Bytes = vec![0x00, 0x01].into(); + pub static ref BLOB_A_DIGEST: B3Digest = blake3::hash(&BLOB_A).as_bytes().into(); + + // 1MB + pub static ref BLOB_B: bytes::Bytes = (0..255).collect::>().repeat(4 * 1024).into(); + pub static ref BLOB_B_DIGEST: B3Digest = blake3::hash(&BLOB_B).as_bytes().into(); + + // Directories + pub static ref DIRECTORY_WITH_KEEP: proto::Directory = proto::Directory { + directories: vec![], + files: vec![FileNode { + name: b".keep".to_vec().into(), + digest: EMPTY_BLOB_DIGEST.clone().into(), + size: 0, + executable: false, + }], + symlinks: vec![], + }; + pub static ref DIRECTORY_COMPLICATED: proto::Directory = proto::Directory { + directories: vec![DirectoryNode { + name: b"keep".to_vec().into(), + digest: DIRECTORY_WITH_KEEP.digest().into(), + size: DIRECTORY_WITH_KEEP.size(), + }], + files: vec![FileNode { + name: b".keep".to_vec().into(), + digest: EMPTY_BLOB_DIGEST.clone().into(), + size: 0, + executable: false, + }], + symlinks: vec![SymlinkNode { + name: b"aa".to_vec().into(), + target: b"/nix/store/somewhereelse".to_vec().into(), + }], + }; + pub static ref DIRECTORY_A: Directory = Directory::default(); + pub static ref DIRECTORY_B: Directory = Directory { + directories: vec![DirectoryNode { + name: b"a".to_vec().into(), + digest: DIRECTORY_A.digest().into(), + size: DIRECTORY_A.size(), + }], + ..Default::default() + }; + pub static ref DIRECTORY_C: Directory = Directory { + directories: vec![ + DirectoryNode { + name: b"a".to_vec().into(), + digest: DIRECTORY_A.digest().into(), + size: DIRECTORY_A.size(), + }, + DirectoryNode { + name: b"a'".to_vec().into(), + digest: DIRECTORY_A.digest().into(), + size: DIRECTORY_A.size(), + } + ], + ..Default::default() + }; +} diff --git a/tvix/castore/src/import.rs b/tvix/castore/src/import.rs new file mode 100644 index 000000000000..92f792bfe704 --- /dev/null +++ b/tvix/castore/src/import.rs @@ -0,0 +1,205 @@ +use crate::blobservice::BlobService; +use crate::directoryservice::DirectoryPutter; +use crate::directoryservice::DirectoryService; +use crate::proto::node::Node; +use crate::proto::Directory; +use crate::proto::DirectoryNode; +use crate::proto::FileNode; +use crate::proto::SymlinkNode; +use crate::Error as CastoreError; +use std::os::unix::ffi::OsStrExt; +use std::sync::Arc; +use std::{ + collections::HashMap, + fmt::Debug, + os::unix::prelude::PermissionsExt, + path::{Path, PathBuf}, +}; +use tracing::instrument; +use walkdir::WalkDir; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("failed to upload directory at {0}: {1}")] + UploadDirectoryError(PathBuf, CastoreError), + + #[error("invalid encoding encountered for entry {0:?}")] + InvalidEncoding(PathBuf), + + #[error("unable to stat {0}: {1}")] + UnableToStat(PathBuf, std::io::Error), + + #[error("unable to open {0}: {1}")] + UnableToOpen(PathBuf, std::io::Error), + + #[error("unable to read {0}: {1}")] + UnableToRead(PathBuf, std::io::Error), +} + +impl From for Error { + fn from(value: CastoreError) -> Self { + match value { + CastoreError::InvalidRequest(_) => panic!("tvix bug"), + CastoreError::StorageError(_) => panic!("error"), + } + } +} + +// This processes a given [walkdir::DirEntry] and returns a +// proto::node::Node, depending on the type of the entry. +// +// If the entry is a file, its contents are uploaded. +// If the entry is a directory, the Directory is uploaded as well. +// For this to work, it relies on the caller to provide the directory object +// with the previously returned (child) nodes. +// +// It assumes entries to be returned in "contents first" order, means this +// will only be called with a directory if all children of it have been +// visited. If the entry is indeed a directory, it'll also upload that +// directory to the store. For this, the so-far-assembled Directory object for +// this path needs to be passed in. +// +// It assumes the caller adds returned nodes to the directories it assembles. +#[instrument(skip_all, fields(entry.file_type=?&entry.file_type(),entry.path=?entry.path()))] +async fn process_entry( + blob_service: Arc, + directory_putter: &mut Box, + entry: &walkdir::DirEntry, + maybe_directory: Option, +) -> Result { + let file_type = entry.file_type(); + + if file_type.is_dir() { + let directory = maybe_directory + .expect("tvix bug: must be called with some directory in the case of directory"); + let directory_digest = directory.digest(); + let directory_size = directory.size(); + + // upload this directory + directory_putter + .put(directory) + .await + .map_err(|e| Error::UploadDirectoryError(entry.path().to_path_buf(), e))?; + + return Ok(Node::Directory(DirectoryNode { + name: entry.file_name().as_bytes().to_owned().into(), + digest: directory_digest.into(), + size: directory_size, + })); + } + + if file_type.is_symlink() { + let target: bytes::Bytes = std::fs::read_link(entry.path()) + .map_err(|e| Error::UnableToStat(entry.path().to_path_buf(), e))? + .as_os_str() + .as_bytes() + .to_owned() + .into(); + + return Ok(Node::Symlink(SymlinkNode { + name: entry.file_name().as_bytes().to_owned().into(), + target, + })); + } + + if file_type.is_file() { + let metadata = entry + .metadata() + .map_err(|e| Error::UnableToStat(entry.path().to_path_buf(), e.into()))?; + + let mut file = tokio::fs::File::open(entry.path()) + .await + .map_err(|e| Error::UnableToOpen(entry.path().to_path_buf(), e))?; + + let mut writer = blob_service.open_write().await; + + if let Err(e) = tokio::io::copy(&mut file, &mut writer).await { + return Err(Error::UnableToRead(entry.path().to_path_buf(), e)); + }; + + let digest = writer.close().await?; + + return Ok(Node::File(FileNode { + name: entry.file_name().as_bytes().to_vec().into(), + digest: digest.into(), + size: metadata.len() as u32, + // If it's executable by the user, it'll become executable. + // This matches nix's dump() function behaviour. + executable: metadata.permissions().mode() & 64 != 0, + })); + } + todo!("handle other types") +} + +/// Ingests the contents at the given path into the tvix store, +/// interacting with a [BlobService] and [DirectoryService]. +/// It returns the root node or an error. +/// +/// It does not follow symlinks at the root, they will be ingested as actual +/// symlinks. +/// +/// It's not interacting with a PathInfoService (from tvix-store), or anything +/// else giving it a "non-content-addressed name". +/// It's up to the caller to possibly register it somewhere (and potentially +/// rename it based on some naming scheme) +#[instrument(skip(blob_service, directory_service), fields(path=?p))] +pub async fn ingest_path + Debug>( + blob_service: Arc, + directory_service: Arc, + p: P, +) -> Result { + let mut directories: HashMap = HashMap::default(); + + // TODO: pass this one instead? + let mut directory_putter = directory_service.put_multiple_start(); + + for entry in WalkDir::new(p) + .follow_links(false) + .follow_root_links(false) + .contents_first(true) + .sort_by_file_name() + { + let entry = entry.unwrap(); + + // process_entry wants an Option in case the entry points to a directory. + // make sure to provide it. + let maybe_directory: Option = { + if entry.file_type().is_dir() { + Some( + directories + .entry(entry.path().to_path_buf()) + .or_default() + .clone(), + ) + } else { + None + } + }; + + let node = process_entry( + blob_service.clone(), + &mut directory_putter, + &entry, + maybe_directory, + ) + .await?; + + if entry.depth() == 0 { + return Ok(node); + } else { + // calculate the parent path, and make sure we register the node there. + // NOTE: entry.depth() > 0 + let parent_path = entry.path().parent().unwrap().to_path_buf(); + + // record node in parent directory, creating a new [proto:Directory] if not there yet. + let parent_directory = directories.entry(parent_path).or_default(); + match node { + Node::Directory(e) => parent_directory.directories.push(e), + Node::File(e) => parent_directory.files.push(e), + Node::Symlink(e) => parent_directory.symlinks.push(e), + } + } + } + // unreachable, we already bailed out before if root doesn't exist. + panic!("tvix bug") +} diff --git a/tvix/castore/src/lib.rs b/tvix/castore/src/lib.rs new file mode 100644 index 000000000000..76490fb7e6ef --- /dev/null +++ b/tvix/castore/src/lib.rs @@ -0,0 +1,15 @@ +mod digests; +mod errors; + +pub mod blobservice; +pub mod directoryservice; +pub mod fixtures; +pub mod import; +pub mod proto; +pub mod utils; + +pub use digests::B3Digest; +pub use errors::Error; + +#[cfg(test)] +mod tests; diff --git a/tvix/castore/src/proto/grpc_blobservice_wrapper.rs b/tvix/castore/src/proto/grpc_blobservice_wrapper.rs new file mode 100644 index 000000000000..93db1deef69a --- /dev/null +++ b/tvix/castore/src/proto/grpc_blobservice_wrapper.rs @@ -0,0 +1,177 @@ +use crate::blobservice::BlobService; +use core::pin::pin; +use futures::TryFutureExt; +use std::{ + collections::VecDeque, + io, + ops::{Deref, DerefMut}, + pin::Pin, + sync::Arc, +}; +use tokio_stream::StreamExt; +use tokio_util::io::ReaderStream; +use tonic::{async_trait, Request, Response, Status, Streaming}; +use tracing::{instrument, warn}; + +pub struct GRPCBlobServiceWrapper { + blob_service: Arc, +} + +impl From> for GRPCBlobServiceWrapper { + fn from(value: Arc) -> Self { + Self { + blob_service: value, + } + } +} + +// This is necessary because bytes::BytesMut comes up with +// a default 64 bytes capacity that cannot be changed +// easily if you assume a bytes::BufMut trait implementation +// Therefore, we override the Default implementation here +// TODO(raitobezarius?): upstream me properly +struct BytesMutWithDefaultCapacity { + inner: bytes::BytesMut, +} + +impl Deref for BytesMutWithDefaultCapacity { + type Target = bytes::BytesMut; + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for BytesMutWithDefaultCapacity { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +impl Default for BytesMutWithDefaultCapacity { + fn default() -> Self { + BytesMutWithDefaultCapacity { + inner: bytes::BytesMut::with_capacity(N), + } + } +} + +impl bytes::Buf for BytesMutWithDefaultCapacity { + fn remaining(&self) -> usize { + self.inner.remaining() + } + + fn chunk(&self) -> &[u8] { + self.inner.chunk() + } + + fn advance(&mut self, cnt: usize) { + self.inner.advance(cnt); + } +} + +unsafe impl bytes::BufMut for BytesMutWithDefaultCapacity { + fn remaining_mut(&self) -> usize { + self.inner.remaining_mut() + } + + unsafe fn advance_mut(&mut self, cnt: usize) { + self.inner.advance_mut(cnt); + } + + fn chunk_mut(&mut self) -> &mut bytes::buf::UninitSlice { + self.inner.chunk_mut() + } +} + +#[async_trait] +impl super::blob_service_server::BlobService for GRPCBlobServiceWrapper { + // https://github.com/tokio-rs/tokio/issues/2723#issuecomment-1534723933 + type ReadStream = + Pin> + Send + 'static>>; + + #[instrument(skip(self))] + async fn stat( + &self, + request: Request, + ) -> Result, Status> { + let rq = request.into_inner(); + let req_digest = rq + .digest + .try_into() + .map_err(|_e| Status::invalid_argument("invalid digest length"))?; + + match self.blob_service.has(&req_digest).await { + Ok(true) => Ok(Response::new(super::BlobMeta::default())), + Ok(false) => Err(Status::not_found(format!("blob {} not found", &req_digest))), + Err(e) => Err(e.into()), + } + } + + #[instrument(skip(self))] + async fn read( + &self, + request: Request, + ) -> Result, Status> { + let rq = request.into_inner(); + + let req_digest = rq + .digest + .try_into() + .map_err(|_e| Status::invalid_argument("invalid digest length"))?; + + match self.blob_service.open_read(&req_digest).await { + Ok(Some(reader)) => { + fn stream_mapper( + x: Result, + ) -> Result { + match x { + Ok(bytes) => Ok(super::BlobChunk { data: bytes }), + Err(e) => Err(Status::from(e)), + } + } + + let chunks_stream = ReaderStream::new(reader).map(stream_mapper); + Ok(Response::new(Box::pin(chunks_stream))) + } + Ok(None) => Err(Status::not_found(format!("blob {} not found", &req_digest))), + Err(e) => Err(e.into()), + } + } + + #[instrument(skip(self))] + async fn put( + &self, + request: Request>, + ) -> Result, Status> { + let req_inner = request.into_inner(); + + let data_stream = req_inner.map(|x| { + x.map(|x| VecDeque::from(x.data.to_vec())) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e)) + }); + + let mut data_reader = tokio_util::io::StreamReader::new(data_stream); + + let mut blob_writer = pin!(self.blob_service.open_write().await); + + tokio::io::copy(&mut data_reader, &mut blob_writer) + .await + .map_err(|e| { + warn!("error copying: {}", e); + Status::internal("error copying") + })?; + + let digest = blob_writer + .close() + .map_err(|e| { + warn!("error closing stream: {}", e); + Status::internal("error closing stream") + }) + .await? + .to_vec(); + + Ok(Response::new(super::PutBlobResponse { + digest: digest.into(), + })) + } +} diff --git a/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs b/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs new file mode 100644 index 000000000000..5e143a7bd7a8 --- /dev/null +++ b/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs @@ -0,0 +1,184 @@ +use crate::proto; +use crate::{directoryservice::DirectoryService, B3Digest}; +use futures::StreamExt; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::{sync::mpsc::channel, task}; +use tokio_stream::wrappers::ReceiverStream; +use tonic::{async_trait, Request, Response, Status, Streaming}; +use tracing::{debug, instrument, warn}; + +pub struct GRPCDirectoryServiceWrapper { + directory_service: Arc, +} + +impl From> for GRPCDirectoryServiceWrapper { + fn from(value: Arc) -> Self { + Self { + directory_service: value, + } + } +} + +#[async_trait] +impl proto::directory_service_server::DirectoryService for GRPCDirectoryServiceWrapper { + type GetStream = ReceiverStream>; + + #[instrument(skip(self))] + async fn get( + &self, + request: Request, + ) -> Result, Status> { + let (tx, rx) = channel(5); + + let req_inner = request.into_inner(); + + let directory_service = self.directory_service.clone(); + + let _task = { + // look at the digest in the request and put it in the top of the queue. + match &req_inner.by_what { + None => return Err(Status::invalid_argument("by_what needs to be specified")), + Some(proto::get_directory_request::ByWhat::Digest(ref digest)) => { + let digest: B3Digest = digest + .clone() + .try_into() + .map_err(|_e| Status::invalid_argument("invalid digest length"))?; + + task::spawn(async move { + if !req_inner.recursive { + let e: Result = + match directory_service.get(&digest).await { + Ok(Some(directory)) => Ok(directory), + Ok(None) => Err(Status::not_found(format!( + "directory {} not found", + digest + ))), + Err(e) => Err(e.into()), + }; + + if tx.send(e).await.is_err() { + debug!("receiver dropped"); + } + } else { + // If recursive was requested, traverse via get_recursive. + let mut directories_it = directory_service.get_recursive(&digest); + + while let Some(e) = directories_it.next().await { + // map err in res from Error to Status + let res = e.map_err(|e| Status::internal(e.to_string())); + if tx.send(res).await.is_err() { + debug!("receiver dropped"); + break; + } + } + } + }); + } + } + }; + + let receiver_stream = ReceiverStream::new(rx); + Ok(Response::new(receiver_stream)) + } + + #[instrument(skip(self, request))] + async fn put( + &self, + request: Request>, + ) -> Result, Status> { + let mut req_inner = request.into_inner(); + // TODO: let this use DirectoryPutter to the store it's connected to, + // and move the validation logic into [SimplePutter]. + + // This keeps track of the seen directory keys, and their size. + // This is used to validate the size field of a reference to a previously sent directory. + // We don't need to keep the contents around, they're stored in the DB. + // https://github.com/rust-lang/rust-clippy/issues/5812 + #[allow(clippy::mutable_key_type)] + let mut seen_directories_sizes: HashMap = HashMap::new(); + let mut last_directory_dgst: Option = None; + + // Consume directories, and insert them into the store. + // Reject directory messages that refer to Directories not sent in the same stream. + while let Some(directory) = req_inner.message().await? { + // validate the directory itself. + if let Err(e) = directory.validate() { + return Err(Status::invalid_argument(format!( + "directory {} failed validation: {}", + directory.digest(), + e, + ))); + } + + // for each child directory this directory refers to, we need + // to ensure it has been seen already in this stream, and that the size + // matches what we recorded. + for child_directory in &directory.directories { + let child_directory_digest: B3Digest = child_directory + .digest + .clone() + .try_into() + .map_err(|_e| Status::internal("invalid child directory digest len"))?; + + match seen_directories_sizes.get(&child_directory_digest) { + None => { + return Err(Status::invalid_argument(format!( + "child directory '{:?}' ({}) in directory '{}' not seen yet", + child_directory.name, + &child_directory_digest, + &directory.digest(), + ))); + } + Some(seen_child_directory_size) => { + if seen_child_directory_size != &child_directory.size { + return Err(Status::invalid_argument(format!( + "child directory '{:?}' ({}) in directory '{}' referred with wrong size, expected {}, actual {}", + child_directory.name, + &child_directory_digest, + &directory.digest(), + seen_child_directory_size, + child_directory.size, + ))); + } + } + } + } + + // NOTE: We can't know if a directory we're receiving actually is + // part of the closure, because we receive directories from the leaf nodes up to + // the root. + // The only thing we could to would be doing a final check when the + // last Directory was received, that all Directories received so far are + // reachable from that (root) node. + + let dgst = directory.digest(); + seen_directories_sizes.insert(dgst.clone(), directory.size()); + last_directory_dgst = Some(dgst.clone()); + + // check if the directory already exists in the database. We can skip + // inserting if it's already there, as that'd be a no-op. + match self.directory_service.get(&dgst).await { + Err(e) => { + warn!("error checking if directory already exists: {}", e); + return Err(e.into()); + } + // skip if already exists + Ok(Some(_)) => {} + // insert if it doesn't already exist + Ok(None) => { + self.directory_service.put(directory).await?; + } + } + } + + // We're done receiving. peek at last_directory_digest and either return the digest, + // or an error, if we received an empty stream. + match last_directory_dgst { + None => Err(Status::invalid_argument("no directories received")), + Some(last_directory_dgst) => Ok(Response::new(proto::PutDirectoryResponse { + root_digest: last_directory_dgst.into(), + })), + } + } +} diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs new file mode 100644 index 000000000000..2a44383fdd85 --- /dev/null +++ b/tvix/castore/src/proto/mod.rs @@ -0,0 +1,279 @@ +#![allow(clippy::derive_partial_eq_without_eq, non_snake_case)] +// https://github.com/hyperium/tonic/issues/1056 +use data_encoding::BASE64; +use std::{collections::HashSet, iter::Peekable}; +use thiserror::Error; + +use prost::Message; + +mod grpc_blobservice_wrapper; +mod grpc_directoryservice_wrapper; + +pub use grpc_blobservice_wrapper::GRPCBlobServiceWrapper; +pub use grpc_directoryservice_wrapper::GRPCDirectoryServiceWrapper; + +use crate::B3Digest; + +tonic::include_proto!("tvix.castore.v1"); + +#[cfg(feature = "reflection")] +/// Compiled file descriptors for implementing [gRPC +/// reflection](https://github.com/grpc/grpc/blob/master/doc/server-reflection.md) with e.g. +/// [`tonic_reflection`](https://docs.rs/tonic-reflection). +pub const FILE_DESCRIPTOR_SET: &[u8] = tonic::include_file_descriptor_set!("tvix.castore.v1"); + +#[cfg(test)] +mod tests; + +/// Errors that can occur during the validation of Directory messages. +#[derive(Debug, PartialEq, Eq, Error)] +pub enum ValidateDirectoryError { + /// Elements are not in sorted order + #[error("{} is not sorted", std::str::from_utf8(.0).unwrap_or(&BASE64.encode(.0)))] + WrongSorting(Vec), + /// Multiple elements with the same name encountered + #[error("{0:?} is a duplicate name")] + DuplicateName(Vec), + /// Invalid name encountered + #[error("Invalid name in {0:?}")] + InvalidName(Vec), + /// Invalid digest length encountered + #[error("Invalid Digest length: {0}")] + InvalidDigestLen(usize), +} + +/// Checks a Node name for validity as an intermediate node, and returns an +/// error that's generated from the supplied constructor. +/// +/// We disallow slashes, null bytes, '.', '..' and the empty string. +fn validate_node_name(name: &[u8], err: fn(Vec) -> E) -> Result<(), E> { + if name.is_empty() + || name == b".." + || name == b"." + || name.contains(&0x00) + || name.contains(&b'/') + { + return Err(err(name.to_vec())); + } + Ok(()) +} + +/// NamedNode is implemented for [FileNode], [DirectoryNode] and [SymlinkNode] +/// and [node::Node], so we can ask all of them for the name easily. +pub trait NamedNode { + fn get_name(&self) -> &[u8]; +} + +impl NamedNode for &FileNode { + fn get_name(&self) -> &[u8] { + &self.name + } +} + +impl NamedNode for &DirectoryNode { + fn get_name(&self) -> &[u8] { + &self.name + } +} + +impl NamedNode for &SymlinkNode { + fn get_name(&self) -> &[u8] { + &self.name + } +} + +impl NamedNode for node::Node { + fn get_name(&self) -> &[u8] { + match self { + node::Node::File(node_file) => &node_file.name, + node::Node::Directory(node_directory) => &node_directory.name, + node::Node::Symlink(node_symlink) => &node_symlink.name, + } + } +} + +impl node::Node { + /// Returns the node with a new name. + pub fn rename(self, name: bytes::Bytes) -> Self { + match self { + node::Node::Directory(n) => node::Node::Directory(DirectoryNode { name, ..n }), + node::Node::File(n) => node::Node::File(FileNode { name, ..n }), + node::Node::Symlink(n) => node::Node::Symlink(SymlinkNode { name, ..n }), + } + } +} + +/// Accepts a name, and a mutable reference to the previous name. +/// If the passed name is larger than the previous one, the reference is updated. +/// If it's not, an error is returned. +fn update_if_lt_prev<'n>( + prev_name: &mut &'n [u8], + name: &'n [u8], +) -> Result<(), ValidateDirectoryError> { + if *name < **prev_name { + return Err(ValidateDirectoryError::WrongSorting(name.to_vec())); + } + *prev_name = name; + Ok(()) +} + +/// Inserts the given name into a HashSet if it's not already in there. +/// If it is, an error is returned. +fn insert_once<'n>( + seen_names: &mut HashSet<&'n [u8]>, + name: &'n [u8], +) -> Result<(), ValidateDirectoryError> { + if seen_names.get(name).is_some() { + return Err(ValidateDirectoryError::DuplicateName(name.to_vec())); + } + seen_names.insert(name); + Ok(()) +} + +impl Directory { + /// The size of a directory is the number of all regular and symlink elements, + /// the number of directory elements, and their size fields. + pub fn size(&self) -> u32 { + self.files.len() as u32 + + self.symlinks.len() as u32 + + self + .directories + .iter() + .fold(0, |acc: u32, e| (acc + 1 + e.size)) + } + + /// Calculates the digest of a Directory, which is the blake3 hash of a + /// Directory protobuf message, serialized in protobuf canonical form. + pub fn digest(&self) -> B3Digest { + let mut hasher = blake3::Hasher::new(); + + hasher + .update(&self.encode_to_vec()) + .finalize() + .as_bytes() + .into() + } + + /// validate checks the directory for invalid data, such as: + /// - violations of name restrictions + /// - invalid digest lengths + /// - not properly sorted lists + /// - duplicate names in the three lists + pub fn validate(&self) -> Result<(), ValidateDirectoryError> { + let mut seen_names: HashSet<&[u8]> = HashSet::new(); + + let mut last_directory_name: &[u8] = b""; + let mut last_file_name: &[u8] = b""; + let mut last_symlink_name: &[u8] = b""; + + // check directories + for directory_node in &self.directories { + validate_node_name(&directory_node.name, ValidateDirectoryError::InvalidName)?; + // ensure the digest has the appropriate size. + if TryInto::::try_into(directory_node.digest.clone()).is_err() { + return Err(ValidateDirectoryError::InvalidDigestLen( + directory_node.digest.len(), + )); + } + + update_if_lt_prev(&mut last_directory_name, &directory_node.name)?; + insert_once(&mut seen_names, &directory_node.name)?; + } + + // check files + for file_node in &self.files { + validate_node_name(&file_node.name, ValidateDirectoryError::InvalidName)?; + if TryInto::::try_into(file_node.digest.clone()).is_err() { + return Err(ValidateDirectoryError::InvalidDigestLen( + file_node.digest.len(), + )); + } + + update_if_lt_prev(&mut last_file_name, &file_node.name)?; + insert_once(&mut seen_names, &file_node.name)?; + } + + // check symlinks + for symlink_node in &self.symlinks { + validate_node_name(&symlink_node.name, ValidateDirectoryError::InvalidName)?; + + update_if_lt_prev(&mut last_symlink_name, &symlink_node.name)?; + insert_once(&mut seen_names, &symlink_node.name)?; + } + + Ok(()) + } + + /// Allows iterating over all three nodes ([DirectoryNode], [FileNode], + /// [SymlinkNode]) in an ordered fashion, as long as the individual lists + /// are sorted (which can be checked by the [Directory::validate]). + pub fn nodes(&self) -> DirectoryNodesIterator { + return DirectoryNodesIterator { + i_directories: self.directories.iter().peekable(), + i_files: self.files.iter().peekable(), + i_symlinks: self.symlinks.iter().peekable(), + }; + } +} + +/// Struct to hold the state of an iterator over all nodes of a Directory. +/// +/// Internally, this keeps peekable Iterators over all three lists of a +/// Directory message. +pub struct DirectoryNodesIterator<'a> { + // directory: &Directory, + i_directories: Peekable>, + i_files: Peekable>, + i_symlinks: Peekable>, +} + +/// looks at two elements implementing NamedNode, and returns true if "left +/// is smaller / comes first". +/// +/// Some(_) is preferred over None. +fn left_name_lt_right(left: Option<&A>, right: Option<&B>) -> bool { + match left { + // if left is None, right always wins + None => false, + Some(left_inner) => { + // left is Some. + match right { + // left is Some, right is None - left wins. + None => true, + Some(right_inner) => { + // both are Some - compare the name. + return left_inner.get_name() < right_inner.get_name(); + } + } + } + } +} + +impl Iterator for DirectoryNodesIterator<'_> { + type Item = node::Node; + + // next returns the next node in the Directory. + // we peek at all three internal iterators, and pick the one with the + // smallest name, to ensure lexicographical ordering. + // The individual lists are already known to be sorted. + fn next(&mut self) -> Option { + if left_name_lt_right(self.i_directories.peek(), self.i_files.peek()) { + // i_directories is still in the game, compare with symlinks + if left_name_lt_right(self.i_directories.peek(), self.i_symlinks.peek()) { + self.i_directories + .next() + .cloned() + .map(node::Node::Directory) + } else { + self.i_symlinks.next().cloned().map(node::Node::Symlink) + } + } else { + // i_files is still in the game, compare with symlinks + if left_name_lt_right(self.i_files.peek(), self.i_symlinks.peek()) { + self.i_files.next().cloned().map(node::Node::File) + } else { + self.i_symlinks.next().cloned().map(node::Node::Symlink) + } + } + } +} diff --git a/tvix/castore/src/proto/tests/directory.rs b/tvix/castore/src/proto/tests/directory.rs new file mode 100644 index 000000000000..eed49b2b593c --- /dev/null +++ b/tvix/castore/src/proto/tests/directory.rs @@ -0,0 +1,287 @@ +use crate::proto::{Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError}; +use lazy_static::lazy_static; + +lazy_static! { + static ref DUMMY_DIGEST: [u8; 32] = [ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, + ]; +} +#[test] +fn size() { + { + let d = Directory::default(); + assert_eq!(d.size(), 0); + } + { + let d = Directory { + directories: vec![DirectoryNode { + name: "foo".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 0, + }], + ..Default::default() + }; + assert_eq!(d.size(), 1); + } + { + let d = Directory { + directories: vec![DirectoryNode { + name: "foo".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 4, + }], + ..Default::default() + }; + assert_eq!(d.size(), 5); + } + { + let d = Directory { + files: vec![FileNode { + name: "foo".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 42, + executable: false, + }], + ..Default::default() + }; + assert_eq!(d.size(), 1); + } + { + let d = Directory { + symlinks: vec![SymlinkNode { + name: "foo".into(), + target: "bar".into(), + }], + ..Default::default() + }; + assert_eq!(d.size(), 1); + } +} + +#[test] +fn digest() { + let d = Directory::default(); + + assert_eq!( + d.digest(), + vec![ + 0xaf, 0x13, 0x49, 0xb9, 0xf5, 0xf9, 0xa1, 0xa6, 0xa0, 0x40, 0x4d, 0xea, 0x36, 0xdc, + 0xc9, 0x49, 0x9b, 0xcb, 0x25, 0xc9, 0xad, 0xc1, 0x12, 0xb7, 0xcc, 0x9a, 0x93, 0xca, + 0xe4, 0x1f, 0x32, 0x62 + ] + .try_into() + .unwrap() + ) +} + +#[test] +fn validate_empty() { + let d = Directory::default(); + assert_eq!(d.validate(), Ok(())); +} + +#[test] +fn validate_invalid_names() { + { + let d = Directory { + directories: vec![DirectoryNode { + name: "".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 42, + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, b"") + } + _ => panic!("unexpected error"), + }; + } + + { + let d = Directory { + directories: vec![DirectoryNode { + name: ".".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 42, + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, b".") + } + _ => panic!("unexpected error"), + }; + } + + { + let d = Directory { + files: vec![FileNode { + name: "..".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 42, + executable: false, + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, b"..") + } + _ => panic!("unexpected error"), + }; + } + + { + let d = Directory { + symlinks: vec![SymlinkNode { + name: "\x00".into(), + target: "foo".into(), + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, b"\x00") + } + _ => panic!("unexpected error"), + }; + } + + { + let d = Directory { + symlinks: vec![SymlinkNode { + name: "foo/bar".into(), + target: "foo".into(), + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, b"foo/bar") + } + _ => panic!("unexpected error"), + }; + } +} + +#[test] +fn validate_invalid_digest() { + let d = Directory { + directories: vec![DirectoryNode { + name: "foo".into(), + digest: vec![0x00, 0x42].into(), // invalid length + size: 42, + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidDigestLen(n) => { + assert_eq!(n, 2) + } + _ => panic!("unexpected error"), + } +} + +#[test] +fn validate_sorting() { + // "b" comes before "a", bad. + { + let d = Directory { + directories: vec![ + DirectoryNode { + name: "b".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 42, + }, + DirectoryNode { + name: "a".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 42, + }, + ], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::WrongSorting(s) => { + assert_eq!(s, b"a"); + } + _ => panic!("unexpected error"), + } + } + + // "a" exists twice, bad. + { + let d = Directory { + directories: vec![ + DirectoryNode { + name: "a".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 42, + }, + DirectoryNode { + name: "a".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 42, + }, + ], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::DuplicateName(s) => { + assert_eq!(s, b"a"); + } + _ => panic!("unexpected error"), + } + } + + // "a" comes before "b", all good. + { + let d = Directory { + directories: vec![ + DirectoryNode { + name: "a".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 42, + }, + DirectoryNode { + name: "b".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 42, + }, + ], + ..Default::default() + }; + + d.validate().expect("validate shouldn't error"); + } + + // [b, c] and [a] are both properly sorted. + { + let d = Directory { + directories: vec![ + DirectoryNode { + name: "b".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 42, + }, + DirectoryNode { + name: "c".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 42, + }, + ], + symlinks: vec![SymlinkNode { + name: "a".into(), + target: "foo".into(), + }], + ..Default::default() + }; + + d.validate().expect("validate shouldn't error"); + } +} diff --git a/tvix/castore/src/proto/tests/directory_nodes_iterator.rs b/tvix/castore/src/proto/tests/directory_nodes_iterator.rs new file mode 100644 index 000000000000..68f147a33210 --- /dev/null +++ b/tvix/castore/src/proto/tests/directory_nodes_iterator.rs @@ -0,0 +1,78 @@ +use crate::proto::Directory; +use crate::proto::DirectoryNode; +use crate::proto::FileNode; +use crate::proto::NamedNode; +use crate::proto::SymlinkNode; + +#[test] +fn iterator() { + let d = Directory { + directories: vec![ + DirectoryNode { + name: "c".into(), + ..DirectoryNode::default() + }, + DirectoryNode { + name: "d".into(), + ..DirectoryNode::default() + }, + DirectoryNode { + name: "h".into(), + ..DirectoryNode::default() + }, + DirectoryNode { + name: "l".into(), + ..DirectoryNode::default() + }, + ], + files: vec![ + FileNode { + name: "b".into(), + ..FileNode::default() + }, + FileNode { + name: "e".into(), + ..FileNode::default() + }, + FileNode { + name: "g".into(), + ..FileNode::default() + }, + FileNode { + name: "j".into(), + ..FileNode::default() + }, + ], + symlinks: vec![ + SymlinkNode { + name: "a".into(), + ..SymlinkNode::default() + }, + SymlinkNode { + name: "f".into(), + ..SymlinkNode::default() + }, + SymlinkNode { + name: "i".into(), + ..SymlinkNode::default() + }, + SymlinkNode { + name: "k".into(), + ..SymlinkNode::default() + }, + ], + }; + + // We keep this strings here and convert to string to make the comparison + // less messy. + let mut node_names: Vec = vec![]; + + for node in d.nodes() { + node_names.push(String::from_utf8(node.get_name().to_vec()).unwrap()); + } + + assert_eq!( + vec!["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"], + node_names + ); +} diff --git a/tvix/castore/src/proto/tests/grpc_blobservice.rs b/tvix/castore/src/proto/tests/grpc_blobservice.rs new file mode 100644 index 000000000000..0d7b340b4409 --- /dev/null +++ b/tvix/castore/src/proto/tests/grpc_blobservice.rs @@ -0,0 +1,100 @@ +use crate::fixtures::{BLOB_A, BLOB_A_DIGEST}; +use crate::proto::blob_service_server::BlobService as GRPCBlobService; +use crate::proto::{BlobChunk, GRPCBlobServiceWrapper, ReadBlobRequest, StatBlobRequest}; +use crate::utils::gen_blob_service; +use tokio_stream::StreamExt; + +fn gen_grpc_blob_service() -> GRPCBlobServiceWrapper { + let blob_service = gen_blob_service(); + GRPCBlobServiceWrapper::from(blob_service) +} + +/// Trying to read a non-existent blob should return a not found error. +#[tokio::test] +async fn not_found_read() { + let service = gen_grpc_blob_service(); + + let resp = service + .read(tonic::Request::new(ReadBlobRequest { + digest: BLOB_A_DIGEST.clone().into(), + })) + .await; + + // We can't use unwrap_err here, because the Ok value doesn't implement + // debug. + if let Err(e) = resp { + assert_eq!(e.code(), tonic::Code::NotFound); + } else { + panic!("resp is not err") + } +} + +/// Trying to stat a non-existent blob should return a not found error. +#[tokio::test] +async fn not_found_stat() { + let service = gen_grpc_blob_service(); + + let resp = service + .stat(tonic::Request::new(StatBlobRequest { + digest: BLOB_A_DIGEST.clone().into(), + ..Default::default() + })) + .await + .expect_err("must fail"); + + // The resp should be a status with Code::NotFound + assert_eq!(resp.code(), tonic::Code::NotFound); +} + +/// Put a blob in the store, get it back. +#[tokio::test] +async fn put_read_stat() { + let service = gen_grpc_blob_service(); + + // Send blob A. + let put_resp = service + .put(tonic_mock::streaming_request(vec![BlobChunk { + data: BLOB_A.clone(), + }])) + .await + .expect("must succeed") + .into_inner(); + + assert_eq!(BLOB_A_DIGEST.to_vec(), put_resp.digest); + + // Stat for the digest of A. + // We currently don't ask for more granular chunking data, as we don't + // expose it yet. + let _resp = service + .stat(tonic::Request::new(StatBlobRequest { + digest: BLOB_A_DIGEST.clone().into(), + ..Default::default() + })) + .await + .expect("must succeed") + .into_inner(); + + // Read the blob. It should return the same data. + let resp = service + .read(tonic::Request::new(ReadBlobRequest { + digest: BLOB_A_DIGEST.clone().into(), + })) + .await; + + let mut rx = resp.ok().unwrap().into_inner(); + + // the stream should contain one element, a BlobChunk with the same contents as BLOB_A. + let item = rx + .next() + .await + .expect("must be some") + .expect("must succeed"); + + assert_eq!(BLOB_A.clone(), item.data); + + // … and no more elements + assert!(rx.next().await.is_none()); + + // TODO: we rely here on the blob being small enough to not get broken up into multiple chunks. + // Test with some bigger blob too +} diff --git a/tvix/castore/src/proto/tests/grpc_directoryservice.rs b/tvix/castore/src/proto/tests/grpc_directoryservice.rs new file mode 100644 index 000000000000..6e8cf1e4a7a4 --- /dev/null +++ b/tvix/castore/src/proto/tests/grpc_directoryservice.rs @@ -0,0 +1,239 @@ +use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C}; +use crate::proto::directory_service_server::DirectoryService as GRPCDirectoryService; +use crate::proto::get_directory_request::ByWhat; +use crate::proto::{Directory, DirectoryNode, SymlinkNode}; +use crate::proto::{GRPCDirectoryServiceWrapper, GetDirectoryRequest}; +use crate::utils::gen_directory_service; +use tokio_stream::StreamExt; +use tonic::Status; + +fn gen_grpc_service() -> GRPCDirectoryServiceWrapper { + let directory_service = gen_directory_service(); + GRPCDirectoryServiceWrapper::from(directory_service) +} + +/// Send the specified GetDirectoryRequest. +/// Returns an error in the case of an error response, or an error in one of +// the items in the stream, or a Vec in the case of a successful +/// request. +async fn get_directories( + svc: &S, + get_directory_request: GetDirectoryRequest, +) -> Result, Status> { + let resp = svc.get(tonic::Request::new(get_directory_request)).await; + + // if the response is an error itself, return the error, otherwise unpack + let stream = match resp { + Ok(resp) => resp, + Err(status) => return Err(status), + } + .into_inner(); + + let directory_results: Vec> = stream.collect().await; + + // turn Vec into Result,Status> + directory_results.into_iter().collect() +} + +/// Trying to get a non-existent Directory should return a not found error. +#[tokio::test] +async fn not_found() { + let service = gen_grpc_service(); + + let resp = service + .get(tonic::Request::new(GetDirectoryRequest { + by_what: Some(ByWhat::Digest(DIRECTORY_A.digest().into())), + ..Default::default() + })) + .await; + + let mut rx = resp.expect("must succeed").into_inner().into_inner(); + + // The stream should contain one element, an error with Code::NotFound. + let item = rx + .recv() + .await + .expect("must be some") + .expect_err("must be err"); + assert_eq!(item.code(), tonic::Code::NotFound); + + // … and nothing else + assert!(rx.recv().await.is_none()); +} + +/// Put a Directory into the store, get it back. +#[tokio::test] +async fn put_get() { + let service = gen_grpc_service(); + + let streaming_request = tonic_mock::streaming_request(vec![DIRECTORY_A.clone()]); + let put_resp = service + .put(streaming_request) + .await + .expect("must succeed") + .into_inner(); + + // the sent root_digest should match the calculated digest + assert_eq!(put_resp.root_digest, DIRECTORY_A.digest().to_vec()); + + // get it back + let items = get_directories( + &service, + GetDirectoryRequest { + by_what: Some(ByWhat::Digest(DIRECTORY_A.digest().into())), + ..Default::default() + }, + ) + .await + .expect("must not error"); + + assert_eq!(vec![DIRECTORY_A.clone()], items); +} + +/// Put multiple Directories into the store, and get them back +#[tokio::test] +async fn put_get_multiple() { + let service = gen_grpc_service(); + + // sending "b" (which refers to "a") without sending "a" first should fail. + let put_resp = service + .put(tonic_mock::streaming_request(vec![DIRECTORY_B.clone()])) + .await + .expect_err("must fail"); + + assert_eq!(tonic::Code::InvalidArgument, put_resp.code()); + + // sending "a", then "b" should succeed, and the response should contain the digest of b. + let put_resp = service + .put(tonic_mock::streaming_request(vec![ + DIRECTORY_A.clone(), + DIRECTORY_B.clone(), + ])) + .await + .expect("must succeed"); + + assert_eq!( + DIRECTORY_B.digest().to_vec(), + put_resp.into_inner().root_digest + ); + + // now, request b, first in non-recursive mode. + let items = get_directories( + &service, + GetDirectoryRequest { + recursive: false, + by_what: Some(ByWhat::Digest(DIRECTORY_B.digest().into())), + }, + ) + .await + .expect("must not error"); + + // We expect to only get b. + assert_eq!(vec![DIRECTORY_B.clone()], items); + + // now, request b, but in recursive mode. + let items = get_directories( + &service, + GetDirectoryRequest { + recursive: true, + by_what: Some(ByWhat::Digest(DIRECTORY_B.digest().into())), + }, + ) + .await + .expect("must not error"); + + // We expect to get b, and then a, because that's how we traverse down. + assert_eq!(vec![DIRECTORY_B.clone(), DIRECTORY_A.clone()], items); +} + +/// Put multiple Directories into the store, and omit duplicates. +#[tokio::test] +async fn put_get_dedup() { + let service = gen_grpc_service(); + + // Send "A", then "C", which refers to "A" two times + // Pretend we're a dumb client sending A twice. + let put_resp = service + .put(tonic_mock::streaming_request(vec![ + DIRECTORY_A.clone(), + DIRECTORY_A.clone(), + DIRECTORY_C.clone(), + ])) + .await + .expect("must succeed"); + + assert_eq!( + DIRECTORY_C.digest().to_vec(), + put_resp.into_inner().root_digest + ); + + // Ask for "C" recursively. We expect to only get "A" once, as there's no point sending it twice. + let items = get_directories( + &service, + GetDirectoryRequest { + recursive: true, + by_what: Some(ByWhat::Digest(DIRECTORY_C.digest().into())), + }, + ) + .await + .expect("must not error"); + + // We expect to get C, and then A (once, as the second A has been deduplicated). + assert_eq!(vec![DIRECTORY_C.clone(), DIRECTORY_A.clone()], items); +} + +/// Trying to upload a Directory failing validation should fail. +#[tokio::test] +async fn put_reject_failed_validation() { + let service = gen_grpc_service(); + + // construct a broken Directory message that fails validation + let broken_directory = Directory { + symlinks: vec![SymlinkNode { + name: "".into(), + target: "doesntmatter".into(), + }], + ..Default::default() + }; + assert!(broken_directory.validate().is_err()); + + // send it over, it must fail + let put_resp = service + .put(tonic_mock::streaming_request(vec![broken_directory])) + .await + .expect_err("must fail"); + + assert_eq!(put_resp.code(), tonic::Code::InvalidArgument); +} + +/// Trying to upload a Directory with wrong size should fail. +#[tokio::test] +async fn put_reject_wrong_size() { + let service = gen_grpc_service(); + + // Construct a directory referring to DIRECTORY_A, but with wrong size. + let broken_parent_directory = Directory { + directories: vec![DirectoryNode { + name: "foo".into(), + digest: DIRECTORY_A.digest().into(), + size: 42, + }], + ..Default::default() + }; + // Make sure we got the size wrong. + assert_ne!( + broken_parent_directory.directories[0].size, + DIRECTORY_A.size() + ); + + // now upload both (first A, then the broken parent). This must fail. + let put_resp = service + .put(tonic_mock::streaming_request(vec![ + DIRECTORY_A.clone(), + broken_parent_directory, + ])) + .await + .expect_err("must fail"); + + assert_eq!(put_resp.code(), tonic::Code::InvalidArgument); +} diff --git a/tvix/castore/src/proto/tests/mod.rs b/tvix/castore/src/proto/tests/mod.rs new file mode 100644 index 000000000000..8b62fadeb5a6 --- /dev/null +++ b/tvix/castore/src/proto/tests/mod.rs @@ -0,0 +1,4 @@ +mod directory; +mod directory_nodes_iterator; +mod grpc_blobservice; +mod grpc_directoryservice; diff --git a/tvix/castore/src/tests/import.rs b/tvix/castore/src/tests/import.rs new file mode 100644 index 000000000000..77ed6d21c07a --- /dev/null +++ b/tvix/castore/src/tests/import.rs @@ -0,0 +1,124 @@ +use crate::fixtures::*; +use crate::import::ingest_path; +use crate::proto; +use crate::utils::{gen_blob_service, gen_directory_service}; +use tempfile::TempDir; + +#[cfg(target_family = "unix")] +use std::os::unix::ffi::OsStrExt; + +#[cfg(target_family = "unix")] +#[tokio::test] +async fn symlink() { + let tmpdir = TempDir::new().unwrap(); + + std::fs::create_dir_all(&tmpdir).unwrap(); + std::os::unix::fs::symlink( + "/nix/store/somewhereelse", + tmpdir.path().join("doesntmatter"), + ) + .unwrap(); + + let root_node = ingest_path( + gen_blob_service(), + gen_directory_service(), + tmpdir.path().join("doesntmatter"), + ) + .await + .expect("must succeed"); + + assert_eq!( + proto::node::Node::Symlink(proto::SymlinkNode { + name: "doesntmatter".into(), + target: "/nix/store/somewhereelse".into(), + }), + root_node, + ) +} + +#[tokio::test] +async fn single_file() { + let tmpdir = TempDir::new().unwrap(); + + std::fs::write(tmpdir.path().join("root"), HELLOWORLD_BLOB_CONTENTS).unwrap(); + + let blob_service = gen_blob_service(); + + let root_node = ingest_path( + blob_service.clone(), + gen_directory_service(), + tmpdir.path().join("root"), + ) + .await + .expect("must succeed"); + + assert_eq!( + proto::node::Node::File(proto::FileNode { + name: "root".into(), + digest: HELLOWORLD_BLOB_DIGEST.clone().into(), + size: HELLOWORLD_BLOB_CONTENTS.len() as u32, + executable: false, + }), + root_node, + ); + + // ensure the blob has been uploaded + assert!(blob_service.has(&HELLOWORLD_BLOB_DIGEST).await.unwrap()); +} + +#[cfg(target_family = "unix")] +#[tokio::test] +async fn complicated() { + let tmpdir = TempDir::new().unwrap(); + + // File ``.keep` + std::fs::write(tmpdir.path().join(".keep"), vec![]).unwrap(); + // Symlink `aa` + std::os::unix::fs::symlink("/nix/store/somewhereelse", tmpdir.path().join("aa")).unwrap(); + // Directory `keep` + std::fs::create_dir(tmpdir.path().join("keep")).unwrap(); + // File ``keep/.keep` + std::fs::write(tmpdir.path().join("keep").join(".keep"), vec![]).unwrap(); + + let blob_service = gen_blob_service(); + let directory_service = gen_directory_service(); + + let root_node = ingest_path( + blob_service.clone(), + directory_service.clone(), + tmpdir.path(), + ) + .await + .expect("must succeed"); + + // ensure root_node matched expectations + assert_eq!( + proto::node::Node::Directory(proto::DirectoryNode { + name: tmpdir + .path() + .file_name() + .unwrap() + .as_bytes() + .to_owned() + .into(), + digest: DIRECTORY_COMPLICATED.digest().into(), + size: DIRECTORY_COMPLICATED.size(), + }), + root_node, + ); + + // ensure DIRECTORY_WITH_KEEP and DIRECTORY_COMPLICATED have been uploaded + assert!(directory_service + .get(&DIRECTORY_WITH_KEEP.digest()) + .await + .unwrap() + .is_some()); + assert!(directory_service + .get(&DIRECTORY_COMPLICATED.digest()) + .await + .unwrap() + .is_some()); + + // ensure EMPTY_BLOB_CONTENTS has been uploaded + assert!(blob_service.has(&EMPTY_BLOB_DIGEST).await.unwrap()); +} diff --git a/tvix/castore/src/tests/mod.rs b/tvix/castore/src/tests/mod.rs new file mode 100644 index 000000000000..d016f3e0aa55 --- /dev/null +++ b/tvix/castore/src/tests/mod.rs @@ -0,0 +1 @@ +mod import; diff --git a/tvix/castore/src/utils.rs b/tvix/castore/src/utils.rs new file mode 100644 index 000000000000..660cb2f4b084 --- /dev/null +++ b/tvix/castore/src/utils.rs @@ -0,0 +1,19 @@ +//! A crate containing constructors to provide instances of a BlobService and +//! DirectoryService. +//! Only used for testing purposes, but across crates. +//! Should be removed once we have a better concept of a "Service registry". + +use std::sync::Arc; + +use crate::{ + blobservice::{BlobService, MemoryBlobService}, + directoryservice::{DirectoryService, MemoryDirectoryService}, +}; + +pub fn gen_blob_service() -> Arc { + Arc::new(MemoryBlobService::default()) +} + +pub fn gen_directory_service() -> Arc { + Arc::new(MemoryDirectoryService::default()) +} -- cgit 1.4.1