From 0ecd10bf307049b9833e69f331ec049ae8840d85 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Sat, 19 Nov 2022 20:34:49 +0000 Subject: feat(tvix/nar-bridge): init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This provides a Nix HTTP Binary Cache interface in front of a tvix-store that's reachable via gRPC. TODOs: - remove import command, move serve up to toplevel. We have nix-copy- closure and tvix-store commands. - loop into CI. We should be able to fetch the protos as a third-party dependency. - Check if we can test nar-bridge slightly easier in an integration test. - Ensure we support connecting to unix sockets and grpc+http at least, using the same syntax as tvix-store. - Don't buffer the entire blob when rendering NAR Co-Authored-By: Connor Brewster Co-Authored-By: Márton Boros Co-Authored-By: Vo Minh Thu Change-Id: I6064474e49dfe78cea67676957462d9f28658d4a Reviewed-on: https://cl.tvl.fyi/c/depot/+/9339 Tested-by: BuildkiteCI Reviewed-by: tazjin --- tvix/nar-bridge/pkg/reader/reader.go | 264 +++++++++++++++++++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100644 tvix/nar-bridge/pkg/reader/reader.go (limited to 'tvix/nar-bridge/pkg/reader/reader.go') diff --git a/tvix/nar-bridge/pkg/reader/reader.go b/tvix/nar-bridge/pkg/reader/reader.go new file mode 100644 index 000000000000..6d34cd168db4 --- /dev/null +++ b/tvix/nar-bridge/pkg/reader/reader.go @@ -0,0 +1,264 @@ +package reader + +import ( + "context" + "crypto/sha256" + "errors" + "fmt" + "io" + "path" + "strings" + + storev1pb "code.tvl.fyi/tvix/store/protos" + "github.com/nix-community/go-nix/pkg/nar" + "lukechampine.com/blake3" +) + +type Reader struct { + hrSha256 *Hasher +} + +// An item on the directories stack +type item struct { + path string + directory *storev1pb.Directory +} + +func New(r io.Reader) *Reader { + // Instead of using the underlying reader itself, wrap the reader + // with a hasher calculating sha256 and one calculating sha512, + // and feed that one into the NAR reader. + hrSha256 := NewHasher(r, sha256.New()) + + return &Reader{ + hrSha256: hrSha256, + } +} + +// Import reads from the internally-wrapped reader, +// and calls the callback functions whenever regular file contents are +// encountered, or a Directory node is about to be finished. +func (r *Reader) Import( + ctx context.Context, + // callback function called with each regular file content + fileCb func(fileReader io.Reader) error, + // callback function called with each finalized directory node + directoryCb func(directory *storev1pb.Directory) error, +) (*storev1pb.PathInfo, error) { + + // construct a NAR reader, by reading through hrSha256 + narReader, err := nar.NewReader(r.hrSha256) + if err != nil { + return nil, fmt.Errorf("failed to instantiate nar reader: %w", err) + } + defer narReader.Close() + + // If we store a symlink or regular file at the root, these are not nil. + // If they are nil, we instead have a stackDirectory. + var rootSymlink *storev1pb.SymlinkNode + var rootFile *storev1pb.FileNode + var stackDirectory *storev1pb.Directory + + var stack = []item{} + + // popFromStack is used when we transition to a different directory or + // drain the stack when we reach the end of the NAR. + // It adds the popped element to the element underneath if any, + // and passes it to the directoryCb callback. + // This function may only be called if the stack is not already empty. + popFromStack := func() error { + // Keep the top item, and "resize" the stack slice. + // This will only make the last element unaccessible, but chances are high + // we're re-using that space anyways. + toPop := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + // if there's still a parent left on the stack, refer to it from there. + if len(stack) > 0 { + dgst, err := toPop.directory.Digest() + if err != nil { + return fmt.Errorf("unable to calculate directory digest: %w", err) + } + + topOfStack := stack[len(stack)-1].directory + topOfStack.Directories = append(topOfStack.Directories, &storev1pb.DirectoryNode{ + Name: []byte(path.Base(toPop.path)), + Digest: dgst, + Size: toPop.directory.Size(), + }) + } + // call the directoryCb + if err := directoryCb(toPop.directory); err != nil { + return fmt.Errorf("failed calling directoryCb: %w", err) + } + // Keep track that we have encounter at least one directory + stackDirectory = toPop.directory + return nil + } + + // Assemble a PathInfo struct, the Node is populated later. + assemblePathInfo := func() *storev1pb.PathInfo { + return &storev1pb.PathInfo{ + Node: nil, + References: [][]byte{}, + Narinfo: &storev1pb.NARInfo{ + NarSize: uint64(r.hrSha256.BytesWritten()), + NarSha256: r.hrSha256.Sum(nil), + Signatures: []*storev1pb.NARInfo_Signature{}, + ReferenceNames: []string{}, + }, + } + } + + getBasename := func(p string) string { + // extract the basename. In case of "/", replace with empty string. + basename := path.Base(p) + if basename == "/" { + basename = "" + } + return basename + } + + for { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + // call narReader.Next() to get the next element + hdr, err := narReader.Next() + + // If this returns an error, it's either EOF (when we're done reading from the NAR), + // or another error + if err != nil { + // if this returns no EOF, bail out + if !errors.Is(err, io.EOF) { + return nil, fmt.Errorf("failed getting next nar element: %w", err) + } + + // The NAR has been read all the way to the end… + // Make sure we close the nar reader, which might read some final trailers. + if err := narReader.Close(); err != nil { + return nil, fmt.Errorf("unable to close nar reader: %w", err) + } + + // Check the stack. While it's not empty, we need to pop things off the stack. + for len(stack) > 0 { + err := popFromStack() + if err != nil { + return nil, fmt.Errorf("unable to pop from stack: %w", err) + } + + } + + // Stack is empty. We now either have a regular or symlink root node, or we encountered at least one directory. + // assemble pathInfo with these and return. + pi := assemblePathInfo() + if rootFile != nil { + pi.Node = &storev1pb.Node{ + Node: &storev1pb.Node_File{ + File: rootFile, + }, + } + } + if rootSymlink != nil { + pi.Node = &storev1pb.Node{ + Node: &storev1pb.Node_Symlink{ + Symlink: rootSymlink, + }, + } + } + if stackDirectory != nil { + // calculate directory digest (i.e. after we received all its contents) + dgst, err := stackDirectory.Digest() + if err != nil { + return nil, fmt.Errorf("unable to calculate root directory digest: %w", err) + } + + pi.Node = &storev1pb.Node{ + Node: &storev1pb.Node_Directory{ + Directory: &storev1pb.DirectoryNode{ + Name: []byte{}, + Digest: dgst, + Size: stackDirectory.Size(), + }, + }, + } + } + return pi, nil + } + + // Check for valid path transitions, pop from stack if needed + // The nar reader already gives us some guarantees about ordering and illegal transitions, + // So we really only need to check if the top-of-stack path is a prefix of the path, + // and if it's not, pop from the stack. + + // We don't need to worry about the root node case, because we can only finish the root "/" + // If we're at the end of the NAR reader (covered by the EOF check) + if len(stack) > 0 && !strings.HasPrefix(hdr.Path, stack[len(stack)-1].path) { + err := popFromStack() + if err != nil { + return nil, fmt.Errorf("unable to pop from stack: %w", err) + } + } + + if hdr.Type == nar.TypeSymlink { + symlinkNode := &storev1pb.SymlinkNode{ + Name: []byte(getBasename(hdr.Path)), + Target: []byte(hdr.LinkTarget), + } + if len(stack) > 0 { + topOfStack := stack[len(stack)-1].directory + topOfStack.Symlinks = append(topOfStack.Symlinks, symlinkNode) + } else { + rootSymlink = symlinkNode + } + + } + if hdr.Type == nar.TypeRegular { + // wrap reader with a reader calculating the blake3 hash + fileReader := NewHasher(narReader, blake3.New(32, nil)) + + err := fileCb(fileReader) + if err != nil { + return nil, fmt.Errorf("failure from fileCb: %w", err) + } + + // drive the file reader to the end, in case the CB function doesn't read + // all the way to the end on its own + if fileReader.BytesWritten() != uint32(hdr.Size) { + _, err := io.ReadAll(fileReader) + if err != nil { + return nil, fmt.Errorf("unable to read until the end of the file content: %w", err) + } + } + + // read the blake3 hash + dgst := fileReader.Sum(nil) + + fileNode := &storev1pb.FileNode{ + Name: []byte(getBasename(hdr.Path)), + Digest: dgst, + Size: uint32(hdr.Size), + Executable: hdr.Executable, + } + if len(stack) > 0 { + topOfStack := stack[len(stack)-1].directory + topOfStack.Files = append(topOfStack.Files, fileNode) + } else { + rootFile = fileNode + } + } + if hdr.Type == nar.TypeDirectory { + directory := &storev1pb.Directory{ + Directories: []*storev1pb.DirectoryNode{}, + Files: []*storev1pb.FileNode{}, + Symlinks: []*storev1pb.SymlinkNode{}, + } + stack = append(stack, item{ + directory: directory, + path: hdr.Path, + }) + } + } + } +} -- cgit 1.4.1