about summary refs log tree commit diff
path: root/tvix/store
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-10-10T18·04+0200
committerclbot <clbot@tvl.fyi>2023-10-10T18·55+0000
commite6ba84ea50217c83d0aafdbbe7c5a659ba3e6586 (patch)
treefa84598af1ad6fe2e45cf72cc99209a46191b1a2 /tvix/store
parent6fe34b7ba03c5e2406a0659da651134e675a55d6 (diff)
feat(tvix/store/protos): add Export r/6772
Export will traverse a given PathInfo structure, and write the contents
in NAR format to the passed Writer.
It uses directoryLookupFn and blobLookupFn to resolve references.

This is being moved over from nar-bridge. We need to keep the code there
around until we can bump go.mod to storev1 with this merged, but the
tests can already be moved entirely.

Change-Id: Ie0de3077b09344cafa00ff1e2ddb8b52e9e631bc
Reviewed-on: https://cl.tvl.fyi/c/depot/+/9602
Tested-by: BuildkiteCI
Reviewed-by: Brian McGee <brian@bmcgee.ie>
Autosubmit: flokli <flokli@flokli.de>
Diffstat (limited to 'tvix/store')
-rw-r--r--tvix/store/protos/export.go275
-rw-r--r--tvix/store/protos/export_test.go151
-rw-r--r--tvix/store/protos/go.mod4
-rw-r--r--tvix/store/protos/go.sum2
-rw-r--r--tvix/store/protos/pick_next_node_test.go51
-rw-r--r--tvix/store/protos/testdata/emptydirectory.narbin0 -> 96 bytes
-rw-r--r--tvix/store/protos/testdata/onebyteregular.narbin0 -> 120 bytes
-rw-r--r--tvix/store/protos/testdata/symlink.narbin0 -> 136 bytes
8 files changed, 482 insertions, 1 deletions
diff --git a/tvix/store/protos/export.go b/tvix/store/protos/export.go
new file mode 100644
index 0000000000..8c4a275229
--- /dev/null
+++ b/tvix/store/protos/export.go
@@ -0,0 +1,275 @@
+package storev1
+
+import (
+	"fmt"
+	"io"
+	"path"
+
+	castorev1pb "code.tvl.fyi/tvix/castore/protos"
+	"github.com/nix-community/go-nix/pkg/nar"
+)
+
+type DirectoryLookupFn func([]byte) (*castorev1pb.Directory, error)
+type BlobLookupFn func([]byte) (io.ReadCloser, error)
+
+// Export will traverse a given PathInfo structure, and write the contents
+// in NAR format to the passed Writer.
+// It uses directoryLookupFn and blobLookupFn to resolve references.
+func Export(
+	w io.Writer,
+	pathInfo *PathInfo,
+	directoryLookupFn DirectoryLookupFn,
+	blobLookupFn BlobLookupFn,
+) error {
+	// initialize a NAR writer
+	narWriter, err := nar.NewWriter(w)
+	if err != nil {
+		return fmt.Errorf("unable to initialize nar writer: %w", err)
+	}
+	defer narWriter.Close()
+
+	// populate rootHeader
+	rootHeader := &nar.Header{
+		Path: "/",
+	}
+
+	// populate a stack
+	// we will push paths and directories to it when entering a directory,
+	// and emit individual elements to the NAR writer, draining the Directory object.
+	// once it's empty, we can pop it off the stack.
+	var stackPaths = []string{}
+	var stackDirectories = []*castorev1pb.Directory{}
+
+	// peek at the pathInfo root and assemble the root node and write to writer
+	// in the case of a regular file, we retrieve and write the contents, close and exit
+	// in the case of a symlink, we write the symlink, close and exit
+	switch v := (pathInfo.GetNode().GetNode()).(type) {
+	case *castorev1pb.Node_File:
+		rootHeader.Type = nar.TypeRegular
+		rootHeader.Size = int64(v.File.GetSize())
+		rootHeader.Executable = v.File.GetExecutable()
+		err := narWriter.WriteHeader(rootHeader)
+		if err != nil {
+			return fmt.Errorf("unable to write root header: %w", err)
+		}
+
+		// if it's a regular file, retrieve and write the contents
+		blobReader, err := blobLookupFn(v.File.GetDigest())
+		if err != nil {
+			return fmt.Errorf("unable to lookup blob: %w", err)
+		}
+		defer blobReader.Close()
+
+		_, err = io.Copy(narWriter, blobReader)
+		if err != nil {
+			return fmt.Errorf("unable to read from blobReader: %w", err)
+		}
+
+		err = blobReader.Close()
+		if err != nil {
+			return fmt.Errorf("unable to close content reader: %w", err)
+		}
+
+		err = narWriter.Close()
+		if err != nil {
+			return fmt.Errorf("unable to close nar reader: %w", err)
+		}
+
+		return nil
+
+	case *castorev1pb.Node_Symlink:
+		rootHeader.Type = nar.TypeSymlink
+		rootHeader.LinkTarget = string(v.Symlink.GetTarget())
+		err := narWriter.WriteHeader(rootHeader)
+		if err != nil {
+			return fmt.Errorf("unable to write root header: %w", err)
+		}
+
+		err = narWriter.Close()
+		if err != nil {
+			return fmt.Errorf("unable to close nar reader: %w", err)
+		}
+
+		return nil
+	case *castorev1pb.Node_Directory:
+		// We have a directory at the root, look it up and put in on the stack.
+		directory, err := directoryLookupFn(v.Directory.Digest)
+		if err != nil {
+			return fmt.Errorf("unable to lookup directory: %w", err)
+		}
+		stackDirectories = append(stackDirectories, directory)
+		stackPaths = append(stackPaths, "/")
+
+		err = narWriter.WriteHeader(&nar.Header{
+			Path: "/",
+			Type: nar.TypeDirectory,
+		})
+
+		if err != nil {
+			return fmt.Errorf("error writing header: %w", err)
+		}
+	}
+
+	// as long as the stack is not empty, we keep running.
+	for {
+		if len(stackDirectories) == 0 {
+			return nil
+		}
+
+		// Peek at the current top of the stack.
+		topOfStack := stackDirectories[len(stackDirectories)-1]
+		topOfStackPath := stackPaths[len(stackPaths)-1]
+
+		// get the next element that's lexicographically smallest, and drain it from
+		// the current directory on top of the stack.
+		nextNode := drainNextNode(topOfStack)
+
+		// If nextNode returns nil, there's nothing left in the directory node, so we
+		// can emit it from the stack.
+		// Contrary to the import case, we don't emit the node popping from the stack, but when pushing.
+		if nextNode == nil {
+			// pop off stack
+			stackDirectories = stackDirectories[:len(stackDirectories)-1]
+			stackPaths = stackPaths[:len(stackPaths)-1]
+
+			continue
+		}
+
+		switch n := (nextNode).(type) {
+		case *castorev1pb.DirectoryNode:
+			err := narWriter.WriteHeader(&nar.Header{
+				Path: path.Join(topOfStackPath, string(n.GetName())),
+				Type: nar.TypeDirectory,
+			})
+			if err != nil {
+				return fmt.Errorf("unable to write nar header: %w", err)
+			}
+
+			d, err := directoryLookupFn(n.GetDigest())
+			if err != nil {
+				return fmt.Errorf("unable to lookup directory: %w", err)
+			}
+
+			// add to stack
+			stackDirectories = append(stackDirectories, d)
+			stackPaths = append(stackPaths, path.Join(topOfStackPath, string(n.GetName())))
+		case *castorev1pb.FileNode:
+			err := narWriter.WriteHeader(&nar.Header{
+				Path:       path.Join(topOfStackPath, string(n.GetName())),
+				Type:       nar.TypeRegular,
+				Size:       int64(n.GetSize()),
+				Executable: n.GetExecutable(),
+			})
+			if err != nil {
+				return fmt.Errorf("unable to write nar header: %w", err)
+			}
+
+			// copy file contents
+			contentReader, err := blobLookupFn(n.GetDigest())
+			if err != nil {
+				return fmt.Errorf("unable to get blob: %w", err)
+			}
+			defer contentReader.Close()
+
+			_, err = io.Copy(narWriter, contentReader)
+			if err != nil {
+				return fmt.Errorf("unable to copy contents from contentReader: %w", err)
+			}
+
+			err = contentReader.Close()
+			if err != nil {
+				return fmt.Errorf("unable to close content reader: %w", err)
+			}
+		case *castorev1pb.SymlinkNode:
+			err := narWriter.WriteHeader(&nar.Header{
+				Path:       path.Join(topOfStackPath, string(n.GetName())),
+				Type:       nar.TypeSymlink,
+				LinkTarget: string(n.GetTarget()),
+			})
+			if err != nil {
+				return fmt.Errorf("unable to write nar header: %w", err)
+			}
+		}
+	}
+}
+
+// drainNextNode will drain a directory message with one of its child nodes,
+// whichever comes first alphabetically.
+func drainNextNode(d *castorev1pb.Directory) interface{} {
+	switch v := (smallestNode(d)).(type) {
+	case *castorev1pb.DirectoryNode:
+		d.Directories = d.Directories[1:]
+		return v
+	case *castorev1pb.FileNode:
+		d.Files = d.Files[1:]
+		return v
+	case *castorev1pb.SymlinkNode:
+		d.Symlinks = d.Symlinks[1:]
+		return v
+	case nil:
+		return nil
+	default:
+		panic("invalid type encountered")
+	}
+}
+
+// smallestNode will return the node from a directory message,
+// whichever comes first alphabetically.
+func smallestNode(d *castorev1pb.Directory) interface{} {
+	childDirectories := d.GetDirectories()
+	childFiles := d.GetFiles()
+	childSymlinks := d.GetSymlinks()
+
+	if len(childDirectories) > 0 {
+		if len(childFiles) > 0 {
+			if len(childSymlinks) > 0 {
+				// directories,files,symlinks
+				return smallerNode(smallerNode(childDirectories[0], childFiles[0]), childSymlinks[0])
+			} else {
+				// directories,files,!symlinks
+				return smallerNode(childDirectories[0], childFiles[0])
+			}
+		} else {
+			// directories,!files
+			if len(childSymlinks) > 0 {
+				// directories,!files,symlinks
+				return smallerNode(childDirectories[0], childSymlinks[0])
+			} else {
+				// directories,!files,!symlinks
+				return childDirectories[0]
+			}
+		}
+	} else {
+		// !directories
+		if len(childFiles) > 0 {
+			// !directories,files
+			if len(childSymlinks) > 0 {
+				// !directories,files,symlinks
+				return smallerNode(childFiles[0], childSymlinks[0])
+			} else {
+				// !directories,files,!symlinks
+				return childFiles[0]
+			}
+		} else {
+			//!directories,!files
+			if len(childSymlinks) > 0 {
+				//!directories,!files,symlinks
+				return childSymlinks[0]
+			} else {
+				//!directories,!files,!symlinks
+				return nil
+			}
+		}
+	}
+}
+
+// smallerNode compares two nodes by their name,
+// and returns the one with the smaller name.
+// both nodes may not be nil, we do check for these cases in smallestNode.
+func smallerNode(a interface{ GetName() []byte }, b interface{ GetName() []byte }) interface{ GetName() []byte } {
+	if string(a.GetName()) < string(b.GetName()) {
+		return a
+	} else {
+		return b
+	}
+}
diff --git a/tvix/store/protos/export_test.go b/tvix/store/protos/export_test.go
new file mode 100644
index 0000000000..d45dd1ee7c
--- /dev/null
+++ b/tvix/store/protos/export_test.go
@@ -0,0 +1,151 @@
+package storev1_test
+
+import (
+	"bytes"
+	"io"
+	"os"
+	"testing"
+
+	castorev1pb "code.tvl.fyi/tvix/castore/protos"
+	storev1pb "code.tvl.fyi/tvix/store/protos"
+	"github.com/stretchr/testify/require"
+	"lukechampine.com/blake3"
+)
+
+func mustDirectoryDigest(d *castorev1pb.Directory) []byte {
+	dgst, err := d.Digest()
+	if err != nil {
+		panic(err)
+	}
+	return dgst
+}
+
+func mustBlobDigest(r io.Reader) []byte {
+	hasher := blake3.New(32, nil)
+	_, err := io.Copy(hasher, r)
+	if err != nil {
+		panic(err)
+	}
+	return hasher.Sum([]byte{})
+}
+
+func TestSymlink(t *testing.T) {
+	pathInfo := &storev1pb.PathInfo{
+
+		Node: &castorev1pb.Node{
+			Node: &castorev1pb.Node_Symlink{
+				Symlink: &castorev1pb.SymlinkNode{
+					Name:   []byte("doesntmatter"),
+					Target: []byte("/nix/store/somewhereelse"),
+				},
+			},
+		},
+	}
+
+	var buf bytes.Buffer
+
+	err := storev1pb.Export(&buf, pathInfo, func([]byte) (*castorev1pb.Directory, error) {
+		panic("no directories expected")
+	}, func([]byte) (io.ReadCloser, error) {
+		panic("no files expected")
+	})
+	require.NoError(t, err, "exporter shouldn't fail")
+
+	f, err := os.Open("testdata/symlink.nar")
+	require.NoError(t, err)
+
+	bytesExpected, err := io.ReadAll(f)
+	if err != nil {
+		panic(err)
+	}
+
+	require.Equal(t, bytesExpected, buf.Bytes(), "expected nar contents to match")
+}
+
+func TestRegular(t *testing.T) {
+	// The blake3 digest of the 0x01 byte.
+	BLAKE3_DIGEST_0X01 := []byte{
+		0x48, 0xfc, 0x72, 0x1f, 0xbb, 0xc1, 0x72, 0xe0, 0x92, 0x5f, 0xa2, 0x7a, 0xf1, 0x67, 0x1d,
+		0xe2, 0x25, 0xba, 0x92, 0x71, 0x34, 0x80, 0x29, 0x98, 0xb1, 0x0a, 0x15, 0x68, 0xa1, 0x88,
+		0x65, 0x2b,
+	}
+
+	pathInfo := &storev1pb.PathInfo{
+		Node: &castorev1pb.Node{
+			Node: &castorev1pb.Node_File{
+				File: &castorev1pb.FileNode{
+					Name:       []byte("doesntmatter"),
+					Digest:     BLAKE3_DIGEST_0X01,
+					Size:       1,
+					Executable: false,
+				},
+			},
+		},
+	}
+
+	var buf bytes.Buffer
+
+	err := storev1pb.Export(&buf, pathInfo, func([]byte) (*castorev1pb.Directory, error) {
+		panic("no directories expected")
+	}, func(blobRef []byte) (io.ReadCloser, error) {
+		if !bytes.Equal(blobRef, BLAKE3_DIGEST_0X01) {
+			panic("unexpected blobref")
+		}
+		return io.NopCloser(bytes.NewBuffer([]byte{0x01})), nil
+	})
+	require.NoError(t, err, "exporter shouldn't fail")
+
+	f, err := os.Open("testdata/onebyteregular.nar")
+	require.NoError(t, err)
+
+	bytesExpected, err := io.ReadAll(f)
+	if err != nil {
+		panic(err)
+	}
+
+	require.Equal(t, bytesExpected, buf.Bytes(), "expected nar contents to match")
+}
+
+func TestEmptyDirectory(t *testing.T) {
+	// construct empty directory node this refers to
+	emptyDirectory := &castorev1pb.Directory{
+		Directories: []*castorev1pb.DirectoryNode{},
+		Files:       []*castorev1pb.FileNode{},
+		Symlinks:    []*castorev1pb.SymlinkNode{},
+	}
+	emptyDirectoryDigest := mustDirectoryDigest(emptyDirectory)
+
+	pathInfo := &storev1pb.PathInfo{
+		Node: &castorev1pb.Node{
+			Node: &castorev1pb.Node_Directory{
+				Directory: &castorev1pb.DirectoryNode{
+					Name:   []byte("doesntmatter"),
+					Digest: emptyDirectoryDigest,
+					Size:   0,
+				},
+			},
+		},
+	}
+
+	var buf bytes.Buffer
+
+	err := storev1pb.Export(&buf, pathInfo, func(directoryRef []byte) (*castorev1pb.Directory, error) {
+		if !bytes.Equal(directoryRef, emptyDirectoryDigest) {
+			panic("unexpected directoryRef")
+		}
+		return emptyDirectory, nil
+	}, func([]byte) (io.ReadCloser, error) {
+		panic("no files expected")
+	})
+	require.NoError(t, err, "exporter shouldn't fail")
+
+	f, err := os.Open("testdata/emptydirectory.nar")
+	require.NoError(t, err)
+
+	bytesExpected, err := io.ReadAll(f)
+	if err != nil {
+		panic(err)
+	}
+
+	require.Equal(t, bytesExpected, buf.Bytes(), "expected nar contents to match")
+}
diff --git a/tvix/store/protos/go.mod b/tvix/store/protos/go.mod
index fe3f7eff3e..34cc9c07ac 100644
--- a/tvix/store/protos/go.mod
+++ b/tvix/store/protos/go.mod
@@ -4,10 +4,12 @@ go 1.19
 
 require (
 	code.tvl.fyi/tvix/castore/protos v0.0.0-20230922125121-72355662d742
+	github.com/google/go-cmp v0.5.6
 	github.com/nix-community/go-nix v0.0.0-20231009143713-ebca3299475b
 	github.com/stretchr/testify v1.8.1
 	google.golang.org/grpc v1.51.0
 	google.golang.org/protobuf v1.31.0
+	lukechampine.com/blake3 v1.1.7
 )
 
 require (
@@ -18,7 +20,7 @@ require (
 	golang.org/x/net v0.7.0 // indirect
 	golang.org/x/sys v0.5.0 // indirect
 	golang.org/x/text v0.7.0 // indirect
+	golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
 	google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
-	lukechampine.com/blake3 v1.1.7 // indirect
 )
diff --git a/tvix/store/protos/go.sum b/tvix/store/protos/go.sum
index 55bfb88e7b..dd7d9bf045 100644
--- a/tvix/store/protos/go.sum
+++ b/tvix/store/protos/go.sum
@@ -28,6 +28,7 @@ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw
 github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
+github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
 github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
 github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
 github.com/nix-community/go-nix v0.0.0-20231009143713-ebca3299475b h1:AWEKOdDO3JnHApQDOmONEKLXbMCQJhYJJfJpiWB9VGI=
@@ -70,6 +71,7 @@ golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3
 golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
 google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
diff --git a/tvix/store/protos/pick_next_node_test.go b/tvix/store/protos/pick_next_node_test.go
new file mode 100644
index 0000000000..830c6cacc1
--- /dev/null
+++ b/tvix/store/protos/pick_next_node_test.go
@@ -0,0 +1,51 @@
+package storev1
+
+import (
+	"testing"
+
+	castorev1pb "code.tvl.fyi/tvix/castore/protos"
+	"github.com/google/go-cmp/cmp"
+	"github.com/stretchr/testify/require"
+	"google.golang.org/protobuf/testing/protocmp"
+)
+
+func requireProtoEq(t *testing.T, expected interface{}, actual interface{}) {
+	if diff := cmp.Diff(expected, actual, protocmp.Transform()); diff != "" {
+		t.Errorf("unexpected difference:\n%v", diff)
+	}
+}
+
+func TestPopNextNode(t *testing.T) {
+	t.Run("empty directory", func(t *testing.T) {
+		d := &castorev1pb.Directory{
+			Directories: []*castorev1pb.DirectoryNode{},
+			Files:       []*castorev1pb.FileNode{},
+			Symlinks:    []*castorev1pb.SymlinkNode{},
+		}
+
+		n := drainNextNode(d)
+		require.Equal(t, nil, n)
+	})
+	t.Run("only directories", func(t *testing.T) {
+		ds := &castorev1pb.Directory{
+			Directories: []*castorev1pb.DirectoryNode{{
+				Name:   []byte("a"),
+				Digest: []byte{},
+				Size:   0,
+			}, {
+				Name:   []byte("b"),
+				Digest: []byte{},
+				Size:   0,
+			}},
+			Files:    []*castorev1pb.FileNode{},
+			Symlinks: []*castorev1pb.SymlinkNode{},
+		}
+
+		n := drainNextNode(ds)
+		requireProtoEq(t, &castorev1pb.DirectoryNode{
+			Name:   []byte("a"),
+			Digest: []byte{},
+			Size:   0,
+		}, n)
+	})
+}
diff --git a/tvix/store/protos/testdata/emptydirectory.nar b/tvix/store/protos/testdata/emptydirectory.nar
new file mode 100644
index 0000000000..baba558622
--- /dev/null
+++ b/tvix/store/protos/testdata/emptydirectory.nar
Binary files differdiff --git a/tvix/store/protos/testdata/onebyteregular.nar b/tvix/store/protos/testdata/onebyteregular.nar
new file mode 100644
index 0000000000..b8c94932bf
--- /dev/null
+++ b/tvix/store/protos/testdata/onebyteregular.nar
Binary files differdiff --git a/tvix/store/protos/testdata/symlink.nar b/tvix/store/protos/testdata/symlink.nar
new file mode 100644
index 0000000000..7990e4ad5b
--- /dev/null
+++ b/tvix/store/protos/testdata/symlink.nar
Binary files differ