diff options
-rw-r--r-- | tvix/default.nix | 2 | ||||
-rw-r--r-- | tvix/nar-bridge/pkg/exporter/full_test.go | 98 | ||||
-rw-r--r-- | tvix/store/protos/export.go | 275 | ||||
-rw-r--r-- | tvix/store/protos/export_test.go (renamed from tvix/nar-bridge/pkg/exporter/export_test.go) | 83 | ||||
-rw-r--r-- | tvix/store/protos/go.mod | 4 | ||||
-rw-r--r-- | tvix/store/protos/go.sum | 2 | ||||
-rw-r--r-- | tvix/store/protos/pick_next_node_test.go (renamed from tvix/nar-bridge/pkg/exporter/pick_next_node_test.go) | 2 | ||||
-rw-r--r-- | tvix/store/protos/testdata/emptydirectory.nar | bin | 0 -> 96 bytes | |||
-rw-r--r-- | tvix/store/protos/testdata/onebyteregular.nar | bin | 0 -> 120 bytes | |||
-rw-r--r-- | tvix/store/protos/testdata/symlink.nar | bin | 0 -> 136 bytes |
10 files changed, 387 insertions, 79 deletions
diff --git a/tvix/default.nix b/tvix/default.nix index b3453d774aea..5fb7642b8755 100644 --- a/tvix/default.nix +++ b/tvix/default.nix @@ -108,7 +108,7 @@ in store-protos-go = pkgs.buildGoModule { name = "store-golang"; src = depot.third_party.gitignoreSource ./store/protos; - vendorHash = "sha256-619ICDpXuDRHRL5XtPlbUoik8yrTDSxoQiVrhsK7UlQ="; + vendorHash = "sha256-qPtEQTd1Vol8vhE10AdwTleTLfYS7xaOir3Ti4MJ+Vc="; }; # Build the Rust documentation for publishing on docs.tvix.dev. diff --git a/tvix/nar-bridge/pkg/exporter/full_test.go b/tvix/nar-bridge/pkg/exporter/full_test.go new file mode 100644 index 000000000000..4875c08e2133 --- /dev/null +++ b/tvix/nar-bridge/pkg/exporter/full_test.go @@ -0,0 +1,98 @@ +package exporter_test + +import ( + "bytes" + "context" + "encoding/base64" + "fmt" + "io" + "os" + "testing" + + castorev1pb "code.tvl.fyi/tvix/castore/protos" + "code.tvl.fyi/tvix/nar-bridge/pkg/exporter" + "code.tvl.fyi/tvix/nar-bridge/pkg/importer" + "github.com/stretchr/testify/require" + "lukechampine.com/blake3" +) + +func TestFull(t *testing.T) { + // We pipe nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar to the exporter, + // and store all the file contents and directory objects received in two hashmaps. + // We then feed it to the writer, and test we come up with the same NAR file. + + f, err := os.Open("../../testdata/nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar") + require.NoError(t, err) + + narContents, err := io.ReadAll(f) + require.NoError(t, err) + + blobsMap := make(map[string][]byte, 0) + directoriesMap := make(map[string]*castorev1pb.Directory) + + pathInfo, err := importer.Import( + context.Background(), + bytes.NewBuffer(narContents), + func(blobReader io.Reader) ([]byte, error) { + // read in contents, we need to put it into filesMap later. + contents, err := io.ReadAll(blobReader) + require.NoError(t, err) + + dgst := mustBlobDigest(bytes.NewReader(contents)) + + // put it in filesMap + blobsMap[base64.StdEncoding.EncodeToString(dgst)] = contents + + return dgst, nil + }, + func(directory *castorev1pb.Directory) ([]byte, error) { + dgst := mustDirectoryDigest(directory) + + directoriesMap[base64.StdEncoding.EncodeToString(dgst)] = directory + return dgst, nil + }, + ) + + require.NoError(t, err) + + // done populating everything, now actually test the export :-) + var buf bytes.Buffer + err = exporter.Export( + &buf, + pathInfo, + func(directoryDgst []byte) (*castorev1pb.Directory, error) { + d, found := directoriesMap[base64.StdEncoding.EncodeToString(directoryDgst)] + if !found { + panic(fmt.Sprintf("directory %v not found", base64.StdEncoding.EncodeToString(directoryDgst))) + } + return d, nil + }, + func(blobDgst []byte) (io.ReadCloser, error) { + blobContents, found := blobsMap[base64.StdEncoding.EncodeToString(blobDgst)] + if !found { + panic(fmt.Sprintf("blob %v not found", base64.StdEncoding.EncodeToString(blobDgst))) + } + return io.NopCloser(bytes.NewReader(blobContents)), nil + }, + ) + + require.NoError(t, err, "exporter shouldn't fail") + require.Equal(t, narContents, buf.Bytes()) +} + +func mustDirectoryDigest(d *castorev1pb.Directory) []byte { + dgst, err := d.Digest() + if err != nil { + panic(err) + } + return dgst +} + +func mustBlobDigest(r io.Reader) []byte { + hasher := blake3.New(32, nil) + _, err := io.Copy(hasher, r) + if err != nil { + panic(err) + } + return hasher.Sum([]byte{}) +} diff --git a/tvix/store/protos/export.go b/tvix/store/protos/export.go new file mode 100644 index 000000000000..8c4a27522977 --- /dev/null +++ b/tvix/store/protos/export.go @@ -0,0 +1,275 @@ +package storev1 + +import ( + "fmt" + "io" + "path" + + castorev1pb "code.tvl.fyi/tvix/castore/protos" + "github.com/nix-community/go-nix/pkg/nar" +) + +type DirectoryLookupFn func([]byte) (*castorev1pb.Directory, error) +type BlobLookupFn func([]byte) (io.ReadCloser, error) + +// Export will traverse a given PathInfo structure, and write the contents +// in NAR format to the passed Writer. +// It uses directoryLookupFn and blobLookupFn to resolve references. +func Export( + w io.Writer, + pathInfo *PathInfo, + directoryLookupFn DirectoryLookupFn, + blobLookupFn BlobLookupFn, +) error { + // initialize a NAR writer + narWriter, err := nar.NewWriter(w) + if err != nil { + return fmt.Errorf("unable to initialize nar writer: %w", err) + } + defer narWriter.Close() + + // populate rootHeader + rootHeader := &nar.Header{ + Path: "/", + } + + // populate a stack + // we will push paths and directories to it when entering a directory, + // and emit individual elements to the NAR writer, draining the Directory object. + // once it's empty, we can pop it off the stack. + var stackPaths = []string{} + var stackDirectories = []*castorev1pb.Directory{} + + // peek at the pathInfo root and assemble the root node and write to writer + // in the case of a regular file, we retrieve and write the contents, close and exit + // in the case of a symlink, we write the symlink, close and exit + switch v := (pathInfo.GetNode().GetNode()).(type) { + case *castorev1pb.Node_File: + rootHeader.Type = nar.TypeRegular + rootHeader.Size = int64(v.File.GetSize()) + rootHeader.Executable = v.File.GetExecutable() + err := narWriter.WriteHeader(rootHeader) + if err != nil { + return fmt.Errorf("unable to write root header: %w", err) + } + + // if it's a regular file, retrieve and write the contents + blobReader, err := blobLookupFn(v.File.GetDigest()) + if err != nil { + return fmt.Errorf("unable to lookup blob: %w", err) + } + defer blobReader.Close() + + _, err = io.Copy(narWriter, blobReader) + if err != nil { + return fmt.Errorf("unable to read from blobReader: %w", err) + } + + err = blobReader.Close() + if err != nil { + return fmt.Errorf("unable to close content reader: %w", err) + } + + err = narWriter.Close() + if err != nil { + return fmt.Errorf("unable to close nar reader: %w", err) + } + + return nil + + case *castorev1pb.Node_Symlink: + rootHeader.Type = nar.TypeSymlink + rootHeader.LinkTarget = string(v.Symlink.GetTarget()) + err := narWriter.WriteHeader(rootHeader) + if err != nil { + return fmt.Errorf("unable to write root header: %w", err) + } + + err = narWriter.Close() + if err != nil { + return fmt.Errorf("unable to close nar reader: %w", err) + } + + return nil + case *castorev1pb.Node_Directory: + // We have a directory at the root, look it up and put in on the stack. + directory, err := directoryLookupFn(v.Directory.Digest) + if err != nil { + return fmt.Errorf("unable to lookup directory: %w", err) + } + stackDirectories = append(stackDirectories, directory) + stackPaths = append(stackPaths, "/") + + err = narWriter.WriteHeader(&nar.Header{ + Path: "/", + Type: nar.TypeDirectory, + }) + + if err != nil { + return fmt.Errorf("error writing header: %w", err) + } + } + + // as long as the stack is not empty, we keep running. + for { + if len(stackDirectories) == 0 { + return nil + } + + // Peek at the current top of the stack. + topOfStack := stackDirectories[len(stackDirectories)-1] + topOfStackPath := stackPaths[len(stackPaths)-1] + + // get the next element that's lexicographically smallest, and drain it from + // the current directory on top of the stack. + nextNode := drainNextNode(topOfStack) + + // If nextNode returns nil, there's nothing left in the directory node, so we + // can emit it from the stack. + // Contrary to the import case, we don't emit the node popping from the stack, but when pushing. + if nextNode == nil { + // pop off stack + stackDirectories = stackDirectories[:len(stackDirectories)-1] + stackPaths = stackPaths[:len(stackPaths)-1] + + continue + } + + switch n := (nextNode).(type) { + case *castorev1pb.DirectoryNode: + err := narWriter.WriteHeader(&nar.Header{ + Path: path.Join(topOfStackPath, string(n.GetName())), + Type: nar.TypeDirectory, + }) + if err != nil { + return fmt.Errorf("unable to write nar header: %w", err) + } + + d, err := directoryLookupFn(n.GetDigest()) + if err != nil { + return fmt.Errorf("unable to lookup directory: %w", err) + } + + // add to stack + stackDirectories = append(stackDirectories, d) + stackPaths = append(stackPaths, path.Join(topOfStackPath, string(n.GetName()))) + case *castorev1pb.FileNode: + err := narWriter.WriteHeader(&nar.Header{ + Path: path.Join(topOfStackPath, string(n.GetName())), + Type: nar.TypeRegular, + Size: int64(n.GetSize()), + Executable: n.GetExecutable(), + }) + if err != nil { + return fmt.Errorf("unable to write nar header: %w", err) + } + + // copy file contents + contentReader, err := blobLookupFn(n.GetDigest()) + if err != nil { + return fmt.Errorf("unable to get blob: %w", err) + } + defer contentReader.Close() + + _, err = io.Copy(narWriter, contentReader) + if err != nil { + return fmt.Errorf("unable to copy contents from contentReader: %w", err) + } + + err = contentReader.Close() + if err != nil { + return fmt.Errorf("unable to close content reader: %w", err) + } + case *castorev1pb.SymlinkNode: + err := narWriter.WriteHeader(&nar.Header{ + Path: path.Join(topOfStackPath, string(n.GetName())), + Type: nar.TypeSymlink, + LinkTarget: string(n.GetTarget()), + }) + if err != nil { + return fmt.Errorf("unable to write nar header: %w", err) + } + } + } +} + +// drainNextNode will drain a directory message with one of its child nodes, +// whichever comes first alphabetically. +func drainNextNode(d *castorev1pb.Directory) interface{} { + switch v := (smallestNode(d)).(type) { + case *castorev1pb.DirectoryNode: + d.Directories = d.Directories[1:] + return v + case *castorev1pb.FileNode: + d.Files = d.Files[1:] + return v + case *castorev1pb.SymlinkNode: + d.Symlinks = d.Symlinks[1:] + return v + case nil: + return nil + default: + panic("invalid type encountered") + } +} + +// smallestNode will return the node from a directory message, +// whichever comes first alphabetically. +func smallestNode(d *castorev1pb.Directory) interface{} { + childDirectories := d.GetDirectories() + childFiles := d.GetFiles() + childSymlinks := d.GetSymlinks() + + if len(childDirectories) > 0 { + if len(childFiles) > 0 { + if len(childSymlinks) > 0 { + // directories,files,symlinks + return smallerNode(smallerNode(childDirectories[0], childFiles[0]), childSymlinks[0]) + } else { + // directories,files,!symlinks + return smallerNode(childDirectories[0], childFiles[0]) + } + } else { + // directories,!files + if len(childSymlinks) > 0 { + // directories,!files,symlinks + return smallerNode(childDirectories[0], childSymlinks[0]) + } else { + // directories,!files,!symlinks + return childDirectories[0] + } + } + } else { + // !directories + if len(childFiles) > 0 { + // !directories,files + if len(childSymlinks) > 0 { + // !directories,files,symlinks + return smallerNode(childFiles[0], childSymlinks[0]) + } else { + // !directories,files,!symlinks + return childFiles[0] + } + } else { + //!directories,!files + if len(childSymlinks) > 0 { + //!directories,!files,symlinks + return childSymlinks[0] + } else { + //!directories,!files,!symlinks + return nil + } + } + } +} + +// smallerNode compares two nodes by their name, +// and returns the one with the smaller name. +// both nodes may not be nil, we do check for these cases in smallestNode. +func smallerNode(a interface{ GetName() []byte }, b interface{ GetName() []byte }) interface{ GetName() []byte } { + if string(a.GetName()) < string(b.GetName()) { + return a + } else { + return b + } +} diff --git a/tvix/nar-bridge/pkg/exporter/export_test.go b/tvix/store/protos/export_test.go index ffee62f29183..d45dd1ee7cf3 100644 --- a/tvix/nar-bridge/pkg/exporter/export_test.go +++ b/tvix/store/protos/export_test.go @@ -1,17 +1,12 @@ -package exporter_test +package storev1_test import ( "bytes" - "context" - "encoding/base64" - "fmt" "io" "os" "testing" castorev1pb "code.tvl.fyi/tvix/castore/protos" - "code.tvl.fyi/tvix/nar-bridge/pkg/exporter" - "code.tvl.fyi/tvix/nar-bridge/pkg/importer" storev1pb "code.tvl.fyi/tvix/store/protos" "github.com/stretchr/testify/require" "lukechampine.com/blake3" @@ -49,14 +44,14 @@ func TestSymlink(t *testing.T) { var buf bytes.Buffer - err := exporter.Export(&buf, pathInfo, func([]byte) (*castorev1pb.Directory, error) { + err := storev1pb.Export(&buf, pathInfo, func([]byte) (*castorev1pb.Directory, error) { panic("no directories expected") }, func([]byte) (io.ReadCloser, error) { panic("no files expected") }) require.NoError(t, err, "exporter shouldn't fail") - f, err := os.Open("../../testdata/symlink.nar") + f, err := os.Open("testdata/symlink.nar") require.NoError(t, err) bytesExpected, err := io.ReadAll(f) @@ -90,7 +85,7 @@ func TestRegular(t *testing.T) { var buf bytes.Buffer - err := exporter.Export(&buf, pathInfo, func([]byte) (*castorev1pb.Directory, error) { + err := storev1pb.Export(&buf, pathInfo, func([]byte) (*castorev1pb.Directory, error) { panic("no directories expected") }, func(blobRef []byte) (io.ReadCloser, error) { if !bytes.Equal(blobRef, BLAKE3_DIGEST_0X01) { @@ -100,7 +95,7 @@ func TestRegular(t *testing.T) { }) require.NoError(t, err, "exporter shouldn't fail") - f, err := os.Open("../../testdata/onebyteregular.nar") + f, err := os.Open("testdata/onebyteregular.nar") require.NoError(t, err) bytesExpected, err := io.ReadAll(f) @@ -134,7 +129,7 @@ func TestEmptyDirectory(t *testing.T) { var buf bytes.Buffer - err := exporter.Export(&buf, pathInfo, func(directoryRef []byte) (*castorev1pb.Directory, error) { + err := storev1pb.Export(&buf, pathInfo, func(directoryRef []byte) (*castorev1pb.Directory, error) { if !bytes.Equal(directoryRef, emptyDirectoryDigest) { panic("unexpected directoryRef") } @@ -144,7 +139,7 @@ func TestEmptyDirectory(t *testing.T) { }) require.NoError(t, err, "exporter shouldn't fail") - f, err := os.Open("../../testdata/emptydirectory.nar") + f, err := os.Open("testdata/emptydirectory.nar") require.NoError(t, err) bytesExpected, err := io.ReadAll(f) @@ -154,67 +149,3 @@ func TestEmptyDirectory(t *testing.T) { require.Equal(t, bytesExpected, buf.Bytes(), "expected nar contents to match") } - -func TestFull(t *testing.T) { - // We pipe nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar to the exporter, - // and store all the file contents and directory objects received in two hashmaps. - // We then feed it to the writer, and test we come up with the same NAR file. - - f, err := os.Open("../../testdata/nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar") - require.NoError(t, err) - - narContents, err := io.ReadAll(f) - require.NoError(t, err) - - blobsMap := make(map[string][]byte, 0) - directoriesMap := make(map[string]*castorev1pb.Directory) - - pathInfo, err := importer.Import( - context.Background(), - bytes.NewBuffer(narContents), - func(blobReader io.Reader) ([]byte, error) { - // read in contents, we need to put it into filesMap later. - contents, err := io.ReadAll(blobReader) - require.NoError(t, err) - - dgst := mustBlobDigest(bytes.NewReader(contents)) - - // put it in filesMap - blobsMap[base64.StdEncoding.EncodeToString(dgst)] = contents - - return dgst, nil - }, - func(directory *castorev1pb.Directory) ([]byte, error) { - dgst := mustDirectoryDigest(directory) - - directoriesMap[base64.StdEncoding.EncodeToString(dgst)] = directory - return dgst, nil - }, - ) - - require.NoError(t, err) - - // done populating everything, now actually test the export :-) - var buf bytes.Buffer - err = exporter.Export( - &buf, - pathInfo, - func(directoryDgst []byte) (*castorev1pb.Directory, error) { - d, found := directoriesMap[base64.StdEncoding.EncodeToString(directoryDgst)] - if !found { - panic(fmt.Sprintf("directory %v not found", base64.StdEncoding.EncodeToString(directoryDgst))) - } - return d, nil - }, - func(blobDgst []byte) (io.ReadCloser, error) { - blobContents, found := blobsMap[base64.StdEncoding.EncodeToString(blobDgst)] - if !found { - panic(fmt.Sprintf("blob %v not found", base64.StdEncoding.EncodeToString(blobDgst))) - } - return io.NopCloser(bytes.NewReader(blobContents)), nil - }, - ) - - require.NoError(t, err, "exporter shouldn't fail") - require.Equal(t, narContents, buf.Bytes()) -} diff --git a/tvix/store/protos/go.mod b/tvix/store/protos/go.mod index fe3f7eff3ed8..34cc9c07acfa 100644 --- a/tvix/store/protos/go.mod +++ b/tvix/store/protos/go.mod @@ -4,10 +4,12 @@ go 1.19 require ( code.tvl.fyi/tvix/castore/protos v0.0.0-20230922125121-72355662d742 + github.com/google/go-cmp v0.5.6 github.com/nix-community/go-nix v0.0.0-20231009143713-ebca3299475b github.com/stretchr/testify v1.8.1 google.golang.org/grpc v1.51.0 google.golang.org/protobuf v1.31.0 + lukechampine.com/blake3 v1.1.7 ) require ( @@ -18,7 +20,7 @@ require ( golang.org/x/net v0.7.0 // indirect golang.org/x/sys v0.5.0 // indirect golang.org/x/text v0.7.0 // indirect + golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - lukechampine.com/blake3 v1.1.7 // indirect ) diff --git a/tvix/store/protos/go.sum b/tvix/store/protos/go.sum index 55bfb88e7b08..dd7d9bf0454d 100644 --- a/tvix/store/protos/go.sum +++ b/tvix/store/protos/go.sum @@ -28,6 +28,7 @@ github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMyw github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= +github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/nix-community/go-nix v0.0.0-20231009143713-ebca3299475b h1:AWEKOdDO3JnHApQDOmONEKLXbMCQJhYJJfJpiWB9VGI= @@ -70,6 +71,7 @@ golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3 golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= diff --git a/tvix/nar-bridge/pkg/exporter/pick_next_node_test.go b/tvix/store/protos/pick_next_node_test.go index c80261ae6b6a..830c6cacc136 100644 --- a/tvix/nar-bridge/pkg/exporter/pick_next_node_test.go +++ b/tvix/store/protos/pick_next_node_test.go @@ -1,4 +1,4 @@ -package exporter +package storev1 import ( "testing" diff --git a/tvix/store/protos/testdata/emptydirectory.nar b/tvix/store/protos/testdata/emptydirectory.nar new file mode 100644 index 000000000000..baba55862255 --- /dev/null +++ b/tvix/store/protos/testdata/emptydirectory.nar Binary files differdiff --git a/tvix/store/protos/testdata/onebyteregular.nar b/tvix/store/protos/testdata/onebyteregular.nar new file mode 100644 index 000000000000..b8c94932bf0c --- /dev/null +++ b/tvix/store/protos/testdata/onebyteregular.nar Binary files differdiff --git a/tvix/store/protos/testdata/symlink.nar b/tvix/store/protos/testdata/symlink.nar new file mode 100644 index 000000000000..7990e4ad5bc2 --- /dev/null +++ b/tvix/store/protos/testdata/symlink.nar Binary files differ |