diff options
author | Florian Klink <flokli@flokli.de> | 2023-09-21T19·32+0300 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2023-09-22T12·51+0000 |
commit | 32f41458c0a0f62bf906021ef096c465ccc45581 (patch) | |
tree | 3aaab8c453871f39c46fb43f8278aa933b24519d /tvix/castore/protos/castore.go | |
parent | d8ef0cfb4a859af7e33828b013356412d02532da (diff) |
refactor(tvix): move castore into tvix-castore crate r/6629
This splits the pure content-addressed layers from tvix-store into a `castore` crate, and only leaves PathInfo related things, as well as the CLI entrypoint in the tvix-store crate. Notable changes: - `fixtures` and `utils` had to be moved out of the `test` cfg, so they can be imported from tvix-store. - Some ad-hoc fixtures in the test were moved to proper fixtures in the same step. - The protos are now created by a (more static) recipe in the protos/ directory. The (now two) golang targets are commented out, as it's not possible to update them properly in the same CL. This will be done by a followup CL once this is merged (and whitby deployed) Bug: https://b.tvl.fyi/issues/301 Change-Id: I8d675d4bf1fb697eb7d479747c1b1e3635718107 Reviewed-on: https://cl.tvl.fyi/c/depot/+/9370 Reviewed-by: tazjin <tazjin@tvl.su> Reviewed-by: flokli <flokli@flokli.de> Autosubmit: flokli <flokli@flokli.de> Tested-by: BuildkiteCI Reviewed-by: Connor Brewster <cbrewster@hey.com>
Diffstat (limited to 'tvix/castore/protos/castore.go')
-rw-r--r-- | tvix/castore/protos/castore.go | 164 |
1 files changed, 164 insertions, 0 deletions
diff --git a/tvix/castore/protos/castore.go b/tvix/castore/protos/castore.go new file mode 100644 index 000000000000..102ba4bff75d --- /dev/null +++ b/tvix/castore/protos/castore.go @@ -0,0 +1,164 @@ +package castorev1 + +import ( + "bytes" + "encoding/base64" + "fmt" + "google.golang.org/protobuf/proto" + "lukechampine.com/blake3" +) + +// The size of a directory is calculated by summing up the numbers of +// `directories`, `files` and `symlinks`, and for each directory, its size +// field. +func (d *Directory) Size() uint32 { + var size uint32 + size = uint32(len(d.Files) + len(d.Symlinks)) + for _, d := range d.Directories { + size += 1 + d.Size + } + return size +} + +func (d *Directory) Digest() ([]byte, error) { + b, err := proto.MarshalOptions{ + Deterministic: true, + }.Marshal(d) + + if err != nil { + return nil, fmt.Errorf("error while marshalling directory: %w", err) + } + + h := blake3.New(32, nil) + + _, err = h.Write(b) + if err != nil { + return nil, fmt.Errorf("error writing to hasher: %w", err) + } + + return h.Sum(nil), nil +} + +// isValidName checks a name for validity. +// We disallow slashes, null bytes, '.', '..' and the empty string. +// Depending on the context, a *Node message with an empty string as name is +// allowed, but they don't occur inside a Directory message. +func isValidName(n []byte) bool { + if len(n) == 0 || bytes.Equal(n, []byte("..")) || bytes.Equal(n, []byte{'.'}) || bytes.Contains(n, []byte{'\x00'}) || bytes.Contains(n, []byte{'/'}) { + return false + } + return true +} + +// Validate thecks the Directory message for invalid data, such as: +// - violations of name restrictions +// - invalid digest lengths +// - not properly sorted lists +// - duplicate names in the three lists +func (d *Directory) Validate() error { + // seenNames contains all seen names so far. + // We populate this to ensure node names are unique across all three lists. + seenNames := make(map[string]interface{}) + + // We also track the last seen name in each of the three lists, + // to ensure nodes are sorted by their names. + var lastDirectoryName, lastFileName, lastSymlinkName []byte + + // helper function to only insert in sorted order. + // used with the three lists above. + // Note this consumes a *pointer to* a string, as it mutates it. + insertIfGt := func(lastName *[]byte, name []byte) error { + // update if it's greater than the previous name + if bytes.Compare(name, *lastName) == 1 { + *lastName = name + return nil + } else { + return fmt.Errorf("%v is not in sorted order", name) + } + } + + // insertOnce inserts into seenNames if the key doesn't exist yet. + insertOnce := func(name []byte) error { + encoded := base64.StdEncoding.EncodeToString(name) + if _, found := seenNames[encoded]; found { + return fmt.Errorf("duplicate name: %v", string(name)) + } + seenNames[encoded] = nil + return nil + } + + // Loop over all Directories, Files and Symlinks individually. + // Check the name for validity, check a potential digest for length, + // then check for sorting in the current list, and uniqueness across all three lists. + for _, directoryNode := range d.Directories { + directoryName := directoryNode.GetName() + + // check name for validity + if !isValidName(directoryName) { + return fmt.Errorf("invalid name for DirectoryNode: %v", directoryName) + } + + // check digest to be 32 bytes + digestLen := len(directoryNode.GetDigest()) + if digestLen != 32 { + return fmt.Errorf("invalid digest length for DirectoryNode: %d", digestLen) + } + + // ensure names are sorted + if err := insertIfGt(&lastDirectoryName, directoryName); err != nil { + return err + } + + // add to seenNames + if err := insertOnce(directoryName); err != nil { + return err + } + + } + + for _, fileNode := range d.Files { + fileName := fileNode.GetName() + + // check name for validity + if !isValidName(fileName) { + return fmt.Errorf("invalid name for FileNode: %v", fileName) + } + + // check digest to be 32 bytes + digestLen := len(fileNode.GetDigest()) + if digestLen != 32 { + return fmt.Errorf("invalid digest length for FileNode: %d", digestLen) + } + + // ensure names are sorted + if err := insertIfGt(&lastFileName, fileName); err != nil { + return err + } + + // add to seenNames + if err := insertOnce(fileName); err != nil { + return err + } + } + + for _, symlinkNode := range d.Symlinks { + symlinkName := symlinkNode.GetName() + + // check name for validity + if !isValidName(symlinkName) { + return fmt.Errorf("invalid name for SymlinkNode: %v", symlinkName) + } + + // ensure names are sorted + if err := insertIfGt(&lastSymlinkName, symlinkName); err != nil { + return err + } + + // add to seenNames + if err := insertOnce(symlinkName); err != nil { + return err + } + } + + return nil +} |