about summary refs log tree commit diff
path: root/tvix/castore-go/castore.go
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-10-17T10·53+0100
committerflokli <flokli@flokli.de>2023-10-17T19·51+0000
commite38733a955f6f6fc8962cf0e669d9cf4696bc14d (patch)
tree52dd4e750ab54870d782add53527f29fe5c2e3d2 /tvix/castore-go/castore.go
parent0325ae3ba328ac7b4215057d2c00ac467dd9d820 (diff)
chore(tvix): move castore golang bindings to tvix/castore-go r/6843
Have `tvix/castore/protos` only contain the protos, no go noise.

Make the `.pb.go` file generation a pure Nix build
at `//tvix/castore/protos:go-bindings`, and have a script at
`//tvix:castore-go-generate` (TBD) that copies the results to
`tvix/castore-go`.

`//tvix:castore-go`, with sources in `tvix/castore-go` now contains the
tooling around the generated bindings, and the generated bindings
themselves (So go mod replace workflows still work).

An additional CI step is added from there to ensure idempotenty of
the .pb.go files.

The code.tvl.fyi webserver config is updated to the new source code
path. I'm still unsure if we want to also update the go.mod name. While
being a backwards-incompatible change, it'll probbaly make it easier
where to find these files, and the amount of external consumers is still
low enough.

Part of b/323.

Change-Id: I2edadd118c22ec08e57c693f6cc2ef3261c62489
Reviewed-on: https://cl.tvl.fyi/c/depot/+/9787
Reviewed-by: Connor Brewster <cbrewster@hey.com>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/castore-go/castore.go')
-rw-r--r--tvix/castore-go/castore.go212
1 files changed, 212 insertions, 0 deletions
diff --git a/tvix/castore-go/castore.go b/tvix/castore-go/castore.go
new file mode 100644
index 000000000000..c9e3757885a0
--- /dev/null
+++ b/tvix/castore-go/castore.go
@@ -0,0 +1,212 @@
+package castorev1
+
+import (
+	"bytes"
+	"encoding/base64"
+	"fmt"
+
+	"google.golang.org/protobuf/proto"
+	"lukechampine.com/blake3"
+)
+
+// The size of a directory is calculated by summing up the numbers of
+// `directories`, `files` and `symlinks`, and for each directory, its size
+// field.
+func (d *Directory) Size() uint32 {
+	var size uint32
+	size = uint32(len(d.Files) + len(d.Symlinks))
+	for _, d := range d.Directories {
+		size += 1 + d.Size
+	}
+	return size
+}
+
+func (d *Directory) Digest() ([]byte, error) {
+	b, err := proto.MarshalOptions{
+		Deterministic: true,
+	}.Marshal(d)
+
+	if err != nil {
+		return nil, fmt.Errorf("error while marshalling directory: %w", err)
+	}
+
+	h := blake3.New(32, nil)
+
+	_, err = h.Write(b)
+	if err != nil {
+		return nil, fmt.Errorf("error writing to hasher: %w", err)
+	}
+
+	return h.Sum(nil), nil
+}
+
+// isValidName checks a name for validity.
+// We disallow slashes, null bytes, '.', '..' and the empty string.
+// Depending on the context, a *Node message with an empty string as name is
+// allowed, but they don't occur inside a Directory message.
+func isValidName(n []byte) bool {
+	if len(n) == 0 || bytes.Equal(n, []byte("..")) || bytes.Equal(n, []byte{'.'}) || bytes.Contains(n, []byte{'\x00'}) || bytes.Contains(n, []byte{'/'}) {
+		return false
+	}
+	return true
+}
+
+// Validate ensures a DirectoryNode has a valid name and correct digest len.
+func (n *DirectoryNode) Validate() error {
+	if len(n.Digest) != 32 {
+		return fmt.Errorf("invalid digest length for %s, expected %d, got %d", n.Name, 32, len(n.Digest))
+	}
+
+	if !isValidName(n.Name) {
+		return fmt.Errorf("invalid node name: %s", n.Name)
+	}
+
+	return nil
+}
+
+// Validate ensures a FileNode has a valid name and correct digest len.
+func (n *FileNode) Validate() error {
+	if len(n.Digest) != 32 {
+		return fmt.Errorf("invalid digest length for %s, expected %d, got %d", n.Name, 32, len(n.Digest))
+	}
+
+	if !isValidName(n.Name) {
+		return fmt.Errorf("invalid node name: %s", n.Name)
+	}
+
+	return nil
+}
+
+// Validate ensures a SymlinkNode has a valid name and target.
+func (n *SymlinkNode) Validate() error {
+	if len(n.Target) == 0 || bytes.Contains(n.Target, []byte{0}) {
+		return fmt.Errorf("invalid symlink target: %s", n.Target)
+	}
+
+	if !isValidName(n.Name) {
+		return fmt.Errorf("invalid node name: %s", n.Name)
+	}
+
+	return nil
+}
+
+// Validate ensures a node is valid, by dispatching to the per-type validation functions.
+func (n *Node) Validate() error {
+	if node := n.GetDirectory(); node != nil {
+		if err := node.Validate(); err != nil {
+			return fmt.Errorf("SymlinkNode failed validation: %w", err)
+		}
+	} else if node := n.GetFile(); node != nil {
+		if err := node.Validate(); err != nil {
+			return fmt.Errorf("FileNode failed validation: %w", err)
+		}
+	} else if node := n.GetSymlink(); node != nil {
+		if err := node.Validate(); err != nil {
+			return fmt.Errorf("SymlinkNode failed validation: %w", err)
+		}
+
+	} else {
+		// this would only happen if we introduced a new type
+		return fmt.Errorf("no specific node found")
+	}
+
+	return nil
+}
+
+// Validate thecks the Directory message for invalid data, such as:
+// - violations of name restrictions
+// - invalid digest lengths
+// - not properly sorted lists
+// - duplicate names in the three lists
+func (d *Directory) Validate() error {
+	// seenNames contains all seen names so far.
+	// We populate this to ensure node names are unique across all three lists.
+	seenNames := make(map[string]interface{})
+
+	// We also track the last seen name in each of the three lists,
+	// to ensure nodes are sorted by their names.
+	var lastDirectoryName, lastFileName, lastSymlinkName []byte
+
+	// helper function to only insert in sorted order.
+	// used with the three lists above.
+	// Note this consumes a *pointer to* a string,  as it mutates it.
+	insertIfGt := func(lastName *[]byte, name []byte) error {
+		// update if it's greater than the previous name
+		if bytes.Compare(name, *lastName) == 1 {
+			*lastName = name
+			return nil
+		} else {
+			return fmt.Errorf("%v is not in sorted order", name)
+		}
+	}
+
+	// insertOnce inserts into seenNames if the key doesn't exist yet.
+	insertOnce := func(name []byte) error {
+		encoded := base64.StdEncoding.EncodeToString(name)
+		if _, found := seenNames[encoded]; found {
+			return fmt.Errorf("duplicate name: %v", string(name))
+		}
+		seenNames[encoded] = nil
+		return nil
+	}
+
+	// Loop over all Directories, Files and Symlinks individually,
+	// check them for validity, then check for sorting in the current list, and
+	// uniqueness across all three lists.
+	for _, directoryNode := range d.Directories {
+		directoryName := directoryNode.GetName()
+
+		if err := directoryNode.Validate(); err != nil {
+			return fmt.Errorf("DirectoryNode %s failed validation: %w", directoryName, err)
+		}
+
+		// ensure names are sorted
+		if err := insertIfGt(&lastDirectoryName, directoryName); err != nil {
+			return err
+		}
+
+		// add to seenNames
+		if err := insertOnce(directoryName); err != nil {
+			return err
+		}
+
+	}
+
+	for _, fileNode := range d.Files {
+		fileName := fileNode.GetName()
+
+		if err := fileNode.Validate(); err != nil {
+			return fmt.Errorf("FileNode %s failed validation: %w", fileName, err)
+		}
+
+		// ensure names are sorted
+		if err := insertIfGt(&lastFileName, fileName); err != nil {
+			return err
+		}
+
+		// add to seenNames
+		if err := insertOnce(fileName); err != nil {
+			return err
+		}
+	}
+
+	for _, symlinkNode := range d.Symlinks {
+		symlinkName := symlinkNode.GetName()
+
+		if err := symlinkNode.Validate(); err != nil {
+			return fmt.Errorf("SymlinkNode %s failed validation: %w", symlinkName, err)
+		}
+
+		// ensure names are sorted
+		if err := insertIfGt(&lastSymlinkName, symlinkName); err != nil {
+			return err
+		}
+
+		// add to seenNames
+		if err := insertOnce(symlinkName); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}