about summary refs log tree commit diff
path: root/tvix/nar-bridge-go/pkg/importer
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-05-14T10·35+0200
committerclbot <clbot@tvl.fyi>2024-05-15T21·31+0000
commit1392913e981ae4edbec6ef39a4d3de44749ad81c (patch)
tree899672eac93c185a11a125e2f8d1c41367edbf17 /tvix/nar-bridge-go/pkg/importer
parentce1aa10b694662a3bb4061184312de7a422cfe42 (diff)
chore(tvix/nar-bridge): move to nar-bridge-go r/8147
Make some space for the rust implementation.

Change-Id: I924dc1657be10abe5a11951c3b9de50bae06db19
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11662
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: yuka <yuka@yuka.dev>
Diffstat (limited to 'tvix/nar-bridge-go/pkg/importer')
-rw-r--r--tvix/nar-bridge-go/pkg/importer/blob_upload.go71
-rw-r--r--tvix/nar-bridge-go/pkg/importer/counting_writer.go21
-rw-r--r--tvix/nar-bridge-go/pkg/importer/directory_upload.go88
-rw-r--r--tvix/nar-bridge-go/pkg/importer/gen_pathinfo.go62
-rw-r--r--tvix/nar-bridge-go/pkg/importer/importer.go303
-rw-r--r--tvix/nar-bridge-go/pkg/importer/importer_test.go537
-rw-r--r--tvix/nar-bridge-go/pkg/importer/roundtrip_test.go85
-rw-r--r--tvix/nar-bridge-go/pkg/importer/util_test.go34
8 files changed, 1201 insertions, 0 deletions
diff --git a/tvix/nar-bridge-go/pkg/importer/blob_upload.go b/tvix/nar-bridge-go/pkg/importer/blob_upload.go
new file mode 100644
index 000000000000..c1255dd3ad5d
--- /dev/null
+++ b/tvix/nar-bridge-go/pkg/importer/blob_upload.go
@@ -0,0 +1,71 @@
+package importer
+
+import (
+	"bufio"
+	"context"
+	"encoding/base64"
+	"errors"
+	"fmt"
+	"io"
+
+	castorev1pb "code.tvl.fyi/tvix/castore-go"
+	log "github.com/sirupsen/logrus"
+)
+
+// the size of individual BlobChunk we send when uploading to BlobService.
+const chunkSize = 1024 * 1024
+
+// this produces a callback function that can be used as blobCb for the
+// importer.Import function call.
+func GenBlobUploaderCb(ctx context.Context, blobServiceClient castorev1pb.BlobServiceClient) func(io.Reader) ([]byte, error) {
+	return func(blobReader io.Reader) ([]byte, error) {
+		// Ensure the blobReader is buffered to at least the chunk size.
+		blobReader = bufio.NewReaderSize(blobReader, chunkSize)
+
+		putter, err := blobServiceClient.Put(ctx)
+		if err != nil {
+			// return error to the importer
+			return nil, fmt.Errorf("error from blob service: %w", err)
+		}
+
+		blobSize := 0
+		chunk := make([]byte, chunkSize)
+
+		for {
+			n, err := blobReader.Read(chunk)
+			if err != nil && !errors.Is(err, io.EOF) {
+				return nil, fmt.Errorf("unable to read from blobreader: %w", err)
+			}
+
+			if n != 0 {
+				log.WithField("chunk_size", n).Debug("sending chunk")
+				blobSize += n
+
+				// send the blob chunk to the server. The err is only valid in the inner scope
+				if err := putter.Send(&castorev1pb.BlobChunk{
+					Data: chunk[:n],
+				}); err != nil {
+					return nil, fmt.Errorf("sending blob chunk: %w", err)
+				}
+			}
+
+			// if our read from blobReader returned an EOF, we're done reading
+			if errors.Is(err, io.EOF) {
+				break
+			}
+
+		}
+
+		resp, err := putter.CloseAndRecv()
+		if err != nil {
+			return nil, fmt.Errorf("close blob putter: %w", err)
+		}
+
+		log.WithFields(log.Fields{
+			"blob_digest": base64.StdEncoding.EncodeToString(resp.GetDigest()),
+			"blob_size":   blobSize,
+		}).Debug("uploaded blob")
+
+		return resp.GetDigest(), nil
+	}
+}
diff --git a/tvix/nar-bridge-go/pkg/importer/counting_writer.go b/tvix/nar-bridge-go/pkg/importer/counting_writer.go
new file mode 100644
index 000000000000..d003a4b11bfd
--- /dev/null
+++ b/tvix/nar-bridge-go/pkg/importer/counting_writer.go
@@ -0,0 +1,21 @@
+package importer
+
+import (
+	"io"
+)
+
+// CountingWriter implements io.Writer.
+var _ io.Writer = &CountingWriter{}
+
+type CountingWriter struct {
+	bytesWritten uint64
+}
+
+func (cw *CountingWriter) Write(p []byte) (n int, err error) {
+	cw.bytesWritten += uint64(len(p))
+	return len(p), nil
+}
+
+func (cw *CountingWriter) BytesWritten() uint64 {
+	return cw.bytesWritten
+}
diff --git a/tvix/nar-bridge-go/pkg/importer/directory_upload.go b/tvix/nar-bridge-go/pkg/importer/directory_upload.go
new file mode 100644
index 000000000000..117f442fa54f
--- /dev/null
+++ b/tvix/nar-bridge-go/pkg/importer/directory_upload.go
@@ -0,0 +1,88 @@
+package importer
+
+import (
+	"bytes"
+	"context"
+	"encoding/base64"
+	"fmt"
+
+	castorev1pb "code.tvl.fyi/tvix/castore-go"
+	log "github.com/sirupsen/logrus"
+)
+
+// DirectoriesUploader opens a Put stream when it receives the first Put() call,
+// and then uses the opened stream for subsequent Put() calls.
+// When the uploading is finished, a call to Done() will close the stream and
+// return the root digest returned from the directoryServiceClient.
+type DirectoriesUploader struct {
+	ctx                       context.Context
+	directoryServiceClient    castorev1pb.DirectoryServiceClient
+	directoryServicePutStream castorev1pb.DirectoryService_PutClient
+	lastDirectoryDigest       []byte
+}
+
+func NewDirectoriesUploader(ctx context.Context, directoryServiceClient castorev1pb.DirectoryServiceClient) *DirectoriesUploader {
+	return &DirectoriesUploader{
+		ctx:                       ctx,
+		directoryServiceClient:    directoryServiceClient,
+		directoryServicePutStream: nil,
+	}
+}
+
+func (du *DirectoriesUploader) Put(directory *castorev1pb.Directory) ([]byte, error) {
+	directoryDigest, err := directory.Digest()
+	if err != nil {
+		return nil, fmt.Errorf("failed calculating directory digest: %w", err)
+	}
+
+	// Send the directory to the directory service
+	// If the stream hasn't been initialized yet, do it first
+	if du.directoryServicePutStream == nil {
+		directoryServicePutStream, err := du.directoryServiceClient.Put(du.ctx)
+		if err != nil {
+			return nil, fmt.Errorf("unable to initialize directory service put stream: %v", err)
+		}
+		du.directoryServicePutStream = directoryServicePutStream
+	}
+
+	// send the directory out
+	err = du.directoryServicePutStream.Send(directory)
+	if err != nil {
+		return nil, fmt.Errorf("error sending directory: %w", err)
+	}
+	log.WithField("digest", base64.StdEncoding.EncodeToString(directoryDigest)).Debug("uploaded directory")
+
+	// update lastDirectoryDigest
+	du.lastDirectoryDigest = directoryDigest
+
+	return directoryDigest, nil
+}
+
+// Done closes the stream and returns the response.
+// It returns null if closed for a second time.
+func (du *DirectoriesUploader) Done() (*castorev1pb.PutDirectoryResponse, error) {
+	// only close once, and only if we opened.
+	if du.directoryServicePutStream == nil {
+		return nil, nil
+	}
+
+	putDirectoryResponse, err := du.directoryServicePutStream.CloseAndRecv()
+	if err != nil {
+		return nil, fmt.Errorf("unable to close directory service put stream: %v", err)
+	}
+
+	// ensure the response contains the same digest as the one we have in lastDirectoryDigest.
+	// Otherwise, the backend came up with another digest than we, in which we return an error.
+	if !bytes.Equal(du.lastDirectoryDigest, putDirectoryResponse.RootDigest) {
+		return nil, fmt.Errorf(
+			"backend calculated different root digest as we, expected %s, actual %s",
+			base64.StdEncoding.EncodeToString(du.lastDirectoryDigest),
+			base64.StdEncoding.EncodeToString(putDirectoryResponse.RootDigest),
+		)
+	}
+
+	// clear directoryServicePutStream.
+	du.directoryServicePutStream = nil
+
+	return putDirectoryResponse, nil
+}
diff --git a/tvix/nar-bridge-go/pkg/importer/gen_pathinfo.go b/tvix/nar-bridge-go/pkg/importer/gen_pathinfo.go
new file mode 100644
index 000000000000..bdc298a9a399
--- /dev/null
+++ b/tvix/nar-bridge-go/pkg/importer/gen_pathinfo.go
@@ -0,0 +1,62 @@
+package importer
+
+import (
+	castorev1pb "code.tvl.fyi/tvix/castore-go"
+	storev1pb "code.tvl.fyi/tvix/store-go"
+	"fmt"
+	"github.com/nix-community/go-nix/pkg/narinfo"
+	"github.com/nix-community/go-nix/pkg/storepath"
+)
+
+// GenPathInfo takes a rootNode and narInfo and assembles a PathInfo.
+// The rootNode is renamed to match the StorePath in the narInfo.
+func GenPathInfo(rootNode *castorev1pb.Node, narInfo *narinfo.NarInfo) (*storev1pb.PathInfo, error) {
+	// parse the storePath from the .narinfo
+	storePath, err := storepath.FromAbsolutePath(narInfo.StorePath)
+	if err != nil {
+		return nil, fmt.Errorf("unable to parse StorePath: %w", err)
+	}
+
+	// construct the references, by parsing ReferenceNames and extracting the digest
+	references := make([][]byte, len(narInfo.References))
+	for i, referenceStr := range narInfo.References {
+		// parse reference as store path
+		referenceStorePath, err := storepath.FromString(referenceStr)
+		if err != nil {
+			return nil, fmt.Errorf("unable to parse reference %s as storepath: %w", referenceStr, err)
+		}
+		references[i] = referenceStorePath.Digest
+	}
+
+	// construct the narInfo.Signatures[*] from pathInfo.Narinfo.Signatures[*]
+	narinfoSignatures := make([]*storev1pb.NARInfo_Signature, len(narInfo.Signatures))
+	for i, narinfoSig := range narInfo.Signatures {
+		narinfoSignatures[i] = &storev1pb.NARInfo_Signature{
+			Name: narinfoSig.Name,
+			Data: narinfoSig.Data,
+		}
+	}
+
+	// assemble the PathInfo.
+	pathInfo := &storev1pb.PathInfo{
+		// embed a new root node with the name set to the store path basename.
+		Node:       castorev1pb.RenamedNode(rootNode, storePath.String()),
+		References: references,
+		Narinfo: &storev1pb.NARInfo{
+			NarSize:        narInfo.NarSize,
+			NarSha256:      narInfo.FileHash.Digest(),
+			Signatures:     narinfoSignatures,
+			ReferenceNames: narInfo.References,
+		},
+	}
+
+	// run Validate on the PathInfo, more as an additional sanity check our code is sound,
+	// to make sure we populated everything properly, before returning it.
+	// Fail hard if we fail validation, this is a code error.
+	if _, err = pathInfo.Validate(); err != nil {
+		panic(fmt.Sprintf("PathInfo failed validation: %v", err))
+	}
+
+	return pathInfo, nil
+
+}
diff --git a/tvix/nar-bridge-go/pkg/importer/importer.go b/tvix/nar-bridge-go/pkg/importer/importer.go
new file mode 100644
index 000000000000..fce6c5f293da
--- /dev/null
+++ b/tvix/nar-bridge-go/pkg/importer/importer.go
@@ -0,0 +1,303 @@
+package importer
+
+import (
+	"bytes"
+	"context"
+	"crypto/sha256"
+	"errors"
+	"fmt"
+	"io"
+	"path"
+	"strings"
+
+	castorev1pb "code.tvl.fyi/tvix/castore-go"
+	"github.com/nix-community/go-nix/pkg/nar"
+	"golang.org/x/sync/errgroup"
+	"lukechampine.com/blake3"
+)
+
+const (
+	// asyncUploadThreshold controls when a file is buffered into memory and uploaded
+	// asynchronously. Files must be smaller than the threshold to be uploaded asynchronously.
+	asyncUploadThreshold = 1024 * 1024 // 1 MiB
+	// maxAsyncUploadBufferBytes is the maximum number of async blob uploads allowed to be
+	// running concurrently at any given time for a simple import operation.
+	maxConcurrentAsyncUploads = 128
+)
+
+// An item on the directories stack
+type stackItem struct {
+	path      string
+	directory *castorev1pb.Directory
+}
+
+// Import reads a NAR from a reader, and returns a the root node,
+// NAR size and NAR sha256 digest.
+func Import(
+	// a context, to support cancellation
+	ctx context.Context,
+	// The reader the data is read from
+	r io.Reader,
+	// callback function called with each regular file content
+	blobCb func(fileReader io.Reader) ([]byte, error),
+	// callback function called with each finalized directory node
+	directoryCb func(directory *castorev1pb.Directory) ([]byte, error),
+) (*castorev1pb.Node, uint64, []byte, error) {
+	// We need to wrap the underlying reader a bit.
+	// - we want to keep track of the number of bytes read in total
+	// - we calculate the sha256 digest over all data read
+	// Express these two things in a MultiWriter, and give the NAR reader a
+	// TeeReader that writes to it.
+	narCountW := &CountingWriter{}
+	sha256W := sha256.New()
+	multiW := io.MultiWriter(narCountW, sha256W)
+	narReader, err := nar.NewReader(io.TeeReader(r, multiW))
+	if err != nil {
+		return nil, 0, nil, fmt.Errorf("failed to instantiate nar reader: %w", err)
+	}
+	defer narReader.Close()
+
+	// If we store a symlink or regular file at the root, these are not nil.
+	// If they are nil, we instead have a stackDirectory.
+	var rootSymlink *castorev1pb.SymlinkNode
+	var rootFile *castorev1pb.FileNode
+	var stackDirectory *castorev1pb.Directory
+
+	// Keep track of all asynch blob uploads so we can make sure they all succeed
+	// before returning.
+	var asyncBlobWg errgroup.Group
+	asyncBlobWg.SetLimit(maxConcurrentAsyncUploads)
+
+	var stack = []stackItem{}
+
+	// popFromStack is used when we transition to a different directory or
+	// drain the stack when we reach the end of the NAR.
+	// It adds the popped element to the element underneath if any,
+	// and passes it to the directoryCb callback.
+	// This function may only be called if the stack is not already empty.
+	popFromStack := func() error {
+		// Keep the top item, and "resize" the stack slice.
+		// This will only make the last element unaccessible, but chances are high
+		// we're re-using that space anyways.
+		toPop := stack[len(stack)-1]
+		stack = stack[:len(stack)-1]
+
+		// call the directoryCb
+		directoryDigest, err := directoryCb(toPop.directory)
+		if err != nil {
+			return fmt.Errorf("failed calling directoryCb: %w", err)
+		}
+
+		// if there's still a parent left on the stack, refer to it from there.
+		if len(stack) > 0 {
+			topOfStack := stack[len(stack)-1].directory
+			topOfStack.Directories = append(topOfStack.Directories, &castorev1pb.DirectoryNode{
+				Name:   []byte(path.Base(toPop.path)),
+				Digest: directoryDigest,
+				Size:   toPop.directory.Size(),
+			})
+		}
+		// Keep track that we have encounter at least one directory
+		stackDirectory = toPop.directory
+		return nil
+	}
+
+	getBasename := func(p string) string {
+		// extract the basename. In case of "/", replace with empty string.
+		basename := path.Base(p)
+		if basename == "/" {
+			basename = ""
+		}
+		return basename
+	}
+
+	for {
+		select {
+		case <-ctx.Done():
+			return nil, 0, nil, ctx.Err()
+		default:
+			// call narReader.Next() to get the next element
+			hdr, err := narReader.Next()
+
+			// If this returns an error, it's either EOF (when we're done reading from the NAR),
+			// or another error.
+			if err != nil {
+				// if this returns no EOF, bail out
+				if !errors.Is(err, io.EOF) {
+					return nil, 0, nil, fmt.Errorf("failed getting next nar element: %w", err)
+				}
+
+				// The NAR has been read all the way to the end…
+				// Make sure we close the nar reader, which might read some final trailers.
+				if err := narReader.Close(); err != nil {
+					return nil, 0, nil, fmt.Errorf("unable to close nar reader: %w", err)
+				}
+
+				// Check the stack. While it's not empty, we need to pop things off the stack.
+				for len(stack) > 0 {
+					err := popFromStack()
+					if err != nil {
+						return nil, 0, nil, fmt.Errorf("unable to pop from stack: %w", err)
+					}
+				}
+
+				// Wait for any pending blob uploads.
+				err := asyncBlobWg.Wait()
+				if err != nil {
+					return nil, 0, nil, fmt.Errorf("async blob upload: %w", err)
+				}
+
+				// Stack is empty.
+				// Now either root{File,Symlink,Directory} is not nil,
+				// and we can return the root node.
+				narSize := narCountW.BytesWritten()
+				narSha256 := sha256W.Sum(nil)
+
+				if rootFile != nil {
+					return &castorev1pb.Node{
+						Node: &castorev1pb.Node_File{
+							File: rootFile,
+						},
+					}, narSize, narSha256, nil
+				} else if rootSymlink != nil {
+					return &castorev1pb.Node{
+						Node: &castorev1pb.Node_Symlink{
+							Symlink: rootSymlink,
+						},
+					}, narSize, narSha256, nil
+				} else if stackDirectory != nil {
+					// calculate directory digest (i.e. after we received all its contents)
+					dgst, err := stackDirectory.Digest()
+					if err != nil {
+						return nil, 0, nil, fmt.Errorf("unable to calculate root directory digest: %w", err)
+					}
+
+					return &castorev1pb.Node{
+						Node: &castorev1pb.Node_Directory{
+							Directory: &castorev1pb.DirectoryNode{
+								Name:   []byte{},
+								Digest: dgst,
+								Size:   stackDirectory.Size(),
+							},
+						},
+					}, narSize, narSha256, nil
+				} else {
+					return nil, 0, nil, fmt.Errorf("no root set")
+				}
+			}
+
+			// Check for valid path transitions, pop from stack if needed
+			// The nar reader already gives us some guarantees about ordering and illegal transitions,
+			// So we really only need to check if the top-of-stack path is a prefix of the path,
+			// and if it's not, pop from the stack. We do this repeatedly until the top of the stack is
+			// the subdirectory the new entry is in, or we hit the root directory.
+
+			// We don't need to worry about the root node case, because we can only finish the root "/"
+			// If we're at the end of the NAR reader (covered by the EOF check)
+			for len(stack) > 1 && !strings.HasPrefix(hdr.Path, stack[len(stack)-1].path+"/") {
+				err := popFromStack()
+				if err != nil {
+					return nil, 0, nil, fmt.Errorf("unable to pop from stack: %w", err)
+				}
+			}
+
+			if hdr.Type == nar.TypeSymlink {
+				symlinkNode := &castorev1pb.SymlinkNode{
+					Name:   []byte(getBasename(hdr.Path)),
+					Target: []byte(hdr.LinkTarget),
+				}
+				if len(stack) > 0 {
+					topOfStack := stack[len(stack)-1].directory
+					topOfStack.Symlinks = append(topOfStack.Symlinks, symlinkNode)
+				} else {
+					rootSymlink = symlinkNode
+				}
+
+			}
+			if hdr.Type == nar.TypeRegular {
+				uploadBlob := func(r io.Reader) ([]byte, error) {
+					// wrap reader with a reader counting the number of bytes read
+					blobCountW := &CountingWriter{}
+					blobReader := io.TeeReader(r, blobCountW)
+
+					blobDigest, err := blobCb(blobReader)
+					if err != nil {
+						return nil, fmt.Errorf("failure from blobCb: %w", err)
+					}
+
+					// ensure blobCb did read all the way to the end.
+					// If it didn't, the blobCb function is wrong and we should bail out.
+					if blobCountW.BytesWritten() != uint64(hdr.Size) {
+						return nil, fmt.Errorf("blobCb did not read all: %d/%d bytes", blobCountW.BytesWritten(), hdr.Size)
+					}
+
+					return blobDigest, nil
+				}
+
+				h := blake3.New(32, nil)
+				blobReader := io.TeeReader(narReader, io.MultiWriter(h))
+				var blobDigest []byte
+
+				// If this file is small enough, read it off the wire immediately and
+				// upload to the blob service asynchronously. This helps reduce the
+				// RTT on blob uploads for NARs with many small files.
+				doAsync := hdr.Size < asyncUploadThreshold
+				if doAsync {
+					blobContents, err := io.ReadAll(blobReader)
+					if err != nil {
+						return nil, 0, nil, fmt.Errorf("read blob: %w", err)
+					}
+
+					blobDigest = h.Sum(nil)
+
+					asyncBlobWg.Go(func() error {
+						blobDigestFromCb, err := uploadBlob(bytes.NewReader(blobContents))
+						if err != nil {
+							return err
+						}
+
+						if !bytes.Equal(blobDigest, blobDigestFromCb) {
+							return fmt.Errorf("unexpected digest (got %x, expected %x)", blobDigestFromCb, blobDigest)
+						}
+
+						return nil
+					})
+				} else {
+					blobDigestFromCb, err := uploadBlob(blobReader)
+					if err != nil {
+						return nil, 0, nil, fmt.Errorf("upload blob: %w", err)
+					}
+
+					blobDigest = h.Sum(nil)
+					if !bytes.Equal(blobDigest, blobDigestFromCb) {
+						return nil, 0, nil, fmt.Errorf("unexpected digest (got %x, expected %x)", blobDigestFromCb, blobDigest)
+					}
+				}
+
+				fileNode := &castorev1pb.FileNode{
+					Name:       []byte(getBasename(hdr.Path)),
+					Digest:     blobDigest,
+					Size:       uint64(hdr.Size),
+					Executable: hdr.Executable,
+				}
+				if len(stack) > 0 {
+					topOfStack := stack[len(stack)-1].directory
+					topOfStack.Files = append(topOfStack.Files, fileNode)
+				} else {
+					rootFile = fileNode
+				}
+			}
+			if hdr.Type == nar.TypeDirectory {
+				directory := &castorev1pb.Directory{
+					Directories: []*castorev1pb.DirectoryNode{},
+					Files:       []*castorev1pb.FileNode{},
+					Symlinks:    []*castorev1pb.SymlinkNode{},
+				}
+				stack = append(stack, stackItem{
+					directory: directory,
+					path:      hdr.Path,
+				})
+			}
+		}
+	}
+}
diff --git a/tvix/nar-bridge-go/pkg/importer/importer_test.go b/tvix/nar-bridge-go/pkg/importer/importer_test.go
new file mode 100644
index 000000000000..313677084f71
--- /dev/null
+++ b/tvix/nar-bridge-go/pkg/importer/importer_test.go
@@ -0,0 +1,537 @@
+package importer_test
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"io"
+	"os"
+	"testing"
+
+	castorev1pb "code.tvl.fyi/tvix/castore-go"
+	"code.tvl.fyi/tvix/nar-bridge-go/pkg/importer"
+	"github.com/stretchr/testify/require"
+)
+
+func TestSymlink(t *testing.T) {
+	f, err := os.Open("../../testdata/symlink.nar")
+	require.NoError(t, err)
+
+	rootNode, narSize, narSha256, err := importer.Import(
+		context.Background(),
+		f,
+		func(blobReader io.Reader) ([]byte, error) {
+			panic("no file contents expected!")
+		}, func(directory *castorev1pb.Directory) ([]byte, error) {
+			panic("no directories expected!")
+		},
+	)
+	require.NoError(t, err)
+	require.Equal(t, &castorev1pb.Node{
+		Node: &castorev1pb.Node_Symlink{
+			Symlink: &castorev1pb.SymlinkNode{
+				Name:   []byte(""),
+				Target: []byte("/nix/store/somewhereelse"),
+			},
+		},
+	}, rootNode)
+	require.Equal(t, []byte{
+		0x09, 0x7d, 0x39, 0x7e, 0x9b, 0x58, 0x26, 0x38, 0x4e, 0xaa, 0x16, 0xc4, 0x57, 0x71, 0x5d, 0x1c, 0x1a, 0x51, 0x67, 0x03, 0x13, 0xea, 0xd0, 0xf5, 0x85, 0x66, 0xe0, 0xb2, 0x32, 0x53, 0x9c, 0xf1,
+	}, narSha256)
+	require.Equal(t, uint64(136), narSize)
+}
+
+func TestRegular(t *testing.T) {
+	f, err := os.Open("../../testdata/onebyteregular.nar")
+	require.NoError(t, err)
+
+	rootNode, narSize, narSha256, err := importer.Import(
+		context.Background(),
+		f,
+		func(blobReader io.Reader) ([]byte, error) {
+			contents, err := io.ReadAll(blobReader)
+			require.NoError(t, err, "reading blobReader should not error")
+			require.Equal(t, []byte{0x01}, contents, "contents read from blobReader should match expectations")
+			return mustBlobDigest(bytes.NewBuffer(contents)), nil
+		}, func(directory *castorev1pb.Directory) ([]byte, error) {
+			panic("no directories expected!")
+		},
+	)
+
+	// The blake3 digest of the 0x01 byte.
+	BLAKE3_DIGEST_0X01 := []byte{
+		0x48, 0xfc, 0x72, 0x1f, 0xbb, 0xc1, 0x72, 0xe0, 0x92, 0x5f, 0xa2, 0x7a, 0xf1, 0x67, 0x1d,
+		0xe2, 0x25, 0xba, 0x92, 0x71, 0x34, 0x80, 0x29, 0x98, 0xb1, 0x0a, 0x15, 0x68, 0xa1, 0x88,
+		0x65, 0x2b,
+	}
+
+	require.NoError(t, err)
+	require.Equal(t, &castorev1pb.Node{
+		Node: &castorev1pb.Node_File{
+			File: &castorev1pb.FileNode{
+				Name:       []byte(""),
+				Digest:     BLAKE3_DIGEST_0X01,
+				Size:       1,
+				Executable: false,
+			},
+		},
+	}, rootNode)
+	require.Equal(t, []byte{
+		0x73, 0x08, 0x50, 0xa8, 0x11, 0x25, 0x9d, 0xbf, 0x3a, 0x68, 0xdc, 0x2e, 0xe8, 0x7a, 0x79, 0xaa, 0x6c, 0xae, 0x9f, 0x71, 0x37, 0x5e, 0xdf, 0x39, 0x6f, 0x9d, 0x7a, 0x91, 0xfb, 0xe9, 0x13, 0x4d,
+	}, narSha256)
+	require.Equal(t, uint64(120), narSize)
+}
+
+func TestEmptyDirectory(t *testing.T) {
+	f, err := os.Open("../../testdata/emptydirectory.nar")
+	require.NoError(t, err)
+
+	expectedDirectory := &castorev1pb.Directory{
+		Directories: []*castorev1pb.DirectoryNode{},
+		Files:       []*castorev1pb.FileNode{},
+		Symlinks:    []*castorev1pb.SymlinkNode{},
+	}
+	rootNode, narSize, narSha256, err := importer.Import(
+		context.Background(),
+		f,
+		func(blobReader io.Reader) ([]byte, error) {
+			panic("no file contents expected!")
+		}, func(directory *castorev1pb.Directory) ([]byte, error) {
+			requireProtoEq(t, expectedDirectory, directory)
+			return mustDirectoryDigest(directory), nil
+		},
+	)
+	require.NoError(t, err)
+	require.Equal(t, &castorev1pb.Node{
+		Node: &castorev1pb.Node_Directory{
+			Directory: &castorev1pb.DirectoryNode{
+				Name:   []byte(""),
+				Digest: mustDirectoryDigest(expectedDirectory),
+				Size:   expectedDirectory.Size(),
+			},
+		},
+	}, rootNode)
+	require.Equal(t, []byte{
+		0xa5, 0x0a, 0x5a, 0xb6, 0xd9, 0x92, 0xf5, 0x59, 0x8e, 0xdd, 0x92, 0x10, 0x50, 0x59, 0xfa, 0xe9, 0xac, 0xfc, 0x19, 0x29, 0x81, 0xe0, 0x8b, 0xd8, 0x85, 0x34, 0xc2, 0x16, 0x7e, 0x92, 0x52, 0x6a,
+	}, narSha256)
+	require.Equal(t, uint64(96), narSize)
+}
+
+func TestFull(t *testing.T) {
+	f, err := os.Open("../../testdata/nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar")
+	require.NoError(t, err)
+
+	expectedDirectoryPaths := []string{
+		"/bin",
+		"/share/man/man1",
+		"/share/man/man5",
+		"/share/man/man8",
+		"/share/man",
+		"/share",
+		"/",
+	}
+	expectedDirectories := make(map[string]*castorev1pb.Directory, len(expectedDirectoryPaths))
+
+	// /bin is a leaf directory
+	expectedDirectories["/bin"] = &castorev1pb.Directory{
+		Directories: []*castorev1pb.DirectoryNode{},
+		Files: []*castorev1pb.FileNode{
+			{
+				Name: []byte("arp"),
+				Digest: []byte{
+					0xfb, 0xc4, 0x61, 0x4a, 0x29, 0x27, 0x11, 0xcb, 0xcc, 0xe4, 0x99, 0x81, 0x9c, 0xf0, 0xa9, 0x17, 0xf7, 0xd0, 0x91, 0xbe, 0xea, 0x08, 0xcb, 0x5b, 0xaa, 0x76, 0x76, 0xf5, 0xee, 0x4f, 0x82, 0xbb,
+				},
+				Size:       55288,
+				Executable: true,
+			},
+			{
+				Name: []byte("hostname"),
+				Digest: []byte{
+					0x9c, 0x6a, 0xe4, 0xb5, 0xe4, 0x6c, 0xb5, 0x67, 0x45, 0x0e, 0xaa, 0x2a, 0xd8, 0xdd, 0x9b, 0x38, 0xd7, 0xed, 0x01, 0x02, 0x84, 0xf7, 0x26, 0xe1, 0xc7, 0xf3, 0x1c, 0xeb, 0xaa, 0x8a, 0x01, 0x30,
+				},
+				Size:       17704,
+				Executable: true,
+			},
+			{
+				Name: []byte("ifconfig"),
+				Digest: []byte{
+					0x25, 0xbe, 0x3b, 0x1d, 0xf4, 0x1a, 0x45, 0x42, 0x79, 0x09, 0x2c, 0x2a, 0x83, 0xf0, 0x0b, 0xff, 0xe8, 0xc0, 0x9c, 0x26, 0x98, 0x70, 0x15, 0x4d, 0xa8, 0xca, 0x05, 0xfe, 0x92, 0x68, 0x35, 0x2e,
+				},
+				Size:       72576,
+				Executable: true,
+			},
+			{
+				Name: []byte("nameif"),
+				Digest: []byte{
+					0x8e, 0xaa, 0xc5, 0xdb, 0x71, 0x08, 0x8e, 0xe5, 0xe6, 0x30, 0x1f, 0x2c, 0x3a, 0xf2, 0x42, 0x39, 0x0c, 0x57, 0x15, 0xaf, 0x50, 0xaa, 0x1c, 0xdf, 0x84, 0x22, 0x08, 0x77, 0x03, 0x54, 0x62, 0xb1,
+				},
+				Size:       18776,
+				Executable: true,
+			},
+			{
+				Name: []byte("netstat"),
+				Digest: []byte{
+					0x13, 0x34, 0x7e, 0xdd, 0x2a, 0x9a, 0x17, 0x0b, 0x3f, 0xc7, 0x0a, 0xe4, 0x92, 0x89, 0x25, 0x9f, 0xaa, 0xb5, 0x05, 0x6b, 0x24, 0xa7, 0x91, 0xeb, 0xaf, 0xf9, 0xe9, 0x35, 0x56, 0xaa, 0x2f, 0xb2,
+				},
+				Size:       131784,
+				Executable: true,
+			},
+			{
+				Name: []byte("plipconfig"),
+				Digest: []byte{
+					0x19, 0x7c, 0x80, 0xdc, 0x81, 0xdc, 0xb4, 0xc0, 0x45, 0xe1, 0xf9, 0x76, 0x51, 0x4f, 0x50, 0xbf, 0xa4, 0x69, 0x51, 0x9a, 0xd4, 0xa9, 0xe7, 0xaa, 0xe7, 0x0d, 0x53, 0x32, 0xff, 0x28, 0x40, 0x60,
+				},
+				Size:       13160,
+				Executable: true,
+			},
+			{
+				Name: []byte("rarp"),
+				Digest: []byte{
+					0x08, 0x85, 0xb4, 0x85, 0x03, 0x2b, 0x3c, 0x7a, 0x3e, 0x24, 0x4c, 0xf8, 0xcc, 0x45, 0x01, 0x9e, 0x79, 0x43, 0x8c, 0x6f, 0x5e, 0x32, 0x46, 0x54, 0xb6, 0x68, 0x91, 0x8e, 0xa0, 0xcb, 0x6e, 0x0d,
+				},
+				Size:       30384,
+				Executable: true,
+			},
+			{
+				Name: []byte("route"),
+				Digest: []byte{
+					0x4d, 0x14, 0x20, 0x89, 0x9e, 0x76, 0xf4, 0xe2, 0x92, 0x53, 0xee, 0x9b, 0x78, 0x7d, 0x23, 0x80, 0x6c, 0xff, 0xe6, 0x33, 0xdc, 0x4a, 0x10, 0x29, 0x39, 0x02, 0xa0, 0x60, 0xff, 0xe2, 0xbb, 0xd7,
+				},
+				Size:       61928,
+				Executable: true,
+			},
+			{
+				Name: []byte("slattach"),
+				Digest: []byte{
+					0xfb, 0x25, 0xc3, 0x73, 0xb7, 0xb1, 0x0b, 0x25, 0xcd, 0x7b, 0x62, 0xf6, 0x71, 0x83, 0xfe, 0x36, 0x80, 0xf6, 0x48, 0xc3, 0xdb, 0xd8, 0x0c, 0xfe, 0xb8, 0xd3, 0xda, 0x32, 0x9b, 0x47, 0x4b, 0x05,
+				},
+				Size:       35672,
+				Executable: true,
+			},
+		},
+		Symlinks: []*castorev1pb.SymlinkNode{
+			{
+				Name:   []byte("dnsdomainname"),
+				Target: []byte("hostname"),
+			},
+			{
+				Name:   []byte("domainname"),
+				Target: []byte("hostname"),
+			},
+			{
+				Name:   []byte("nisdomainname"),
+				Target: []byte("hostname"),
+			},
+			{
+				Name:   []byte("ypdomainname"),
+				Target: []byte("hostname"),
+			},
+		},
+	}
+
+	// /share/man/man1 is a leaf directory.
+	// The parser traversed over /sbin, but only added it to / which is still on the stack.
+	expectedDirectories["/share/man/man1"] = &castorev1pb.Directory{
+		Directories: []*castorev1pb.DirectoryNode{},
+		Files: []*castorev1pb.FileNode{
+			{
+				Name: []byte("dnsdomainname.1.gz"),
+				Digest: []byte{
+					0x98, 0x8a, 0xbd, 0xfa, 0x64, 0xd5, 0xb9, 0x27, 0xfe, 0x37, 0x43, 0x56, 0xb3, 0x18, 0xc7, 0x2b, 0xcb, 0xe3, 0x17, 0x1c, 0x17, 0xf4, 0x17, 0xeb, 0x4a, 0xa4, 0x99, 0x64, 0x39, 0xca, 0x2d, 0xee,
+				},
+				Size:       40,
+				Executable: false,
+			},
+			{
+				Name: []byte("domainname.1.gz"),
+				Digest: []byte{
+					0x98, 0x8a, 0xbd, 0xfa, 0x64, 0xd5, 0xb9, 0x27, 0xfe, 0x37, 0x43, 0x56, 0xb3, 0x18, 0xc7, 0x2b, 0xcb, 0xe3, 0x17, 0x1c, 0x17, 0xf4, 0x17, 0xeb, 0x4a, 0xa4, 0x99, 0x64, 0x39, 0xca, 0x2d, 0xee,
+				},
+				Size:       40,
+				Executable: false,
+			},
+			{
+				Name: []byte("hostname.1.gz"),
+				Digest: []byte{
+					0xbf, 0x89, 0xe6, 0x28, 0x00, 0x24, 0x66, 0x79, 0x70, 0x04, 0x38, 0xd6, 0xdd, 0x9d, 0xf6, 0x0e, 0x0d, 0xee, 0x00, 0xf7, 0x64, 0x4f, 0x05, 0x08, 0x9d, 0xf0, 0x36, 0xde, 0x85, 0xf4, 0x75, 0xdb,
+				},
+				Size:       1660,
+				Executable: false,
+			},
+			{
+				Name: []byte("nisdomainname.1.gz"),
+				Digest: []byte{
+					0x98, 0x8a, 0xbd, 0xfa, 0x64, 0xd5, 0xb9, 0x27, 0xfe, 0x37, 0x43, 0x56, 0xb3, 0x18, 0xc7, 0x2b, 0xcb, 0xe3, 0x17, 0x1c, 0x17, 0xf4, 0x17, 0xeb, 0x4a, 0xa4, 0x99, 0x64, 0x39, 0xca, 0x2d, 0xee,
+				},
+				Size:       40,
+				Executable: false,
+			},
+			{
+				Name: []byte("ypdomainname.1.gz"),
+				Digest: []byte{
+					0x98, 0x8a, 0xbd, 0xfa, 0x64, 0xd5, 0xb9, 0x27, 0xfe, 0x37, 0x43, 0x56, 0xb3, 0x18, 0xc7, 0x2b, 0xcb, 0xe3, 0x17, 0x1c, 0x17, 0xf4, 0x17, 0xeb, 0x4a, 0xa4, 0x99, 0x64, 0x39, 0xca, 0x2d, 0xee,
+				},
+				Size:       40,
+				Executable: false,
+			},
+		},
+		Symlinks: []*castorev1pb.SymlinkNode{},
+	}
+
+	// /share/man/man5 is a leaf directory
+	expectedDirectories["/share/man/man5"] = &castorev1pb.Directory{
+		Directories: []*castorev1pb.DirectoryNode{},
+		Files: []*castorev1pb.FileNode{
+			{
+				Name: []byte("ethers.5.gz"),
+				Digest: []byte{
+					0x42, 0x63, 0x8c, 0xc4, 0x18, 0x93, 0xcf, 0x60, 0xd6, 0xff, 0x43, 0xbc, 0x16, 0xb4, 0xfd, 0x22, 0xd2, 0xf2, 0x05, 0x0b, 0x52, 0xdc, 0x6a, 0x6b, 0xff, 0x34, 0xe2, 0x6a, 0x38, 0x3a, 0x07, 0xe3,
+				},
+				Size:       563,
+				Executable: false,
+			},
+		},
+		Symlinks: []*castorev1pb.SymlinkNode{},
+	}
+
+	// /share/man/man8 is a leaf directory
+	expectedDirectories["/share/man/man8"] = &castorev1pb.Directory{
+		Directories: []*castorev1pb.DirectoryNode{},
+		Files: []*castorev1pb.FileNode{
+			{
+				Name: []byte("arp.8.gz"),
+				Digest: []byte{
+					0xf5, 0x35, 0x4e, 0xf5, 0xf6, 0x44, 0xf7, 0x52, 0x0f, 0x42, 0xa0, 0x26, 0x51, 0xd9, 0x89, 0xf9, 0x68, 0xf2, 0xef, 0xeb, 0xba, 0xe1, 0xf4, 0x55, 0x01, 0x57, 0x77, 0xb7, 0x68, 0x55, 0x92, 0xef,
+				},
+				Size:       2464,
+				Executable: false,
+			},
+			{
+				Name: []byte("ifconfig.8.gz"),
+				Digest: []byte{
+					0x18, 0x65, 0x25, 0x11, 0x32, 0xee, 0x77, 0x91, 0x35, 0x4c, 0x3c, 0x24, 0xdb, 0xaf, 0x66, 0xdb, 0xfc, 0x17, 0x7b, 0xba, 0xe1, 0x3d, 0x05, 0xd2, 0xca, 0x6e, 0x2c, 0xe4, 0xef, 0xb8, 0xa8, 0xbe,
+				},
+				Size:       3382,
+				Executable: false,
+			},
+			{
+				Name: []byte("nameif.8.gz"),
+				Digest: []byte{
+					0x73, 0xc1, 0x27, 0xe8, 0x3b, 0xa8, 0x49, 0xdc, 0x0e, 0xdf, 0x70, 0x5f, 0xaf, 0x06, 0x01, 0x2c, 0x62, 0xe9, 0x18, 0x67, 0x01, 0x94, 0x64, 0x26, 0xca, 0x95, 0x22, 0xc0, 0xdc, 0xe4, 0x42, 0xb6,
+				},
+				Size:       523,
+				Executable: false,
+			},
+			{
+				Name: []byte("netstat.8.gz"),
+				Digest: []byte{
+					0xc0, 0x86, 0x43, 0x4a, 0x43, 0x57, 0xaa, 0x84, 0xa7, 0x24, 0xa0, 0x7c, 0x65, 0x38, 0x46, 0x1c, 0xf2, 0x45, 0xa2, 0xef, 0x12, 0x44, 0x18, 0xba, 0x52, 0x56, 0xe9, 0x8e, 0x6a, 0x0f, 0x70, 0x63,
+				},
+				Size:       4284,
+				Executable: false,
+			},
+			{
+				Name: []byte("plipconfig.8.gz"),
+				Digest: []byte{
+					0x2a, 0xd9, 0x1d, 0xa8, 0x9e, 0x0d, 0x05, 0xd0, 0xb0, 0x49, 0xaa, 0x64, 0xba, 0x29, 0x28, 0xc6, 0x45, 0xe1, 0xbb, 0x5e, 0x72, 0x8d, 0x48, 0x7b, 0x09, 0x4f, 0x0a, 0x82, 0x1e, 0x26, 0x83, 0xab,
+				},
+				Size:       889,
+				Executable: false,
+			},
+			{
+				Name: []byte("rarp.8.gz"),
+				Digest: []byte{
+					0x3d, 0x51, 0xc1, 0xd0, 0x6a, 0x59, 0x1e, 0x6d, 0x9a, 0xf5, 0x06, 0xd2, 0xe7, 0x7d, 0x7d, 0xd0, 0x70, 0x3d, 0x84, 0x64, 0xc3, 0x7d, 0xfb, 0x10, 0x84, 0x3b, 0xe1, 0xa9, 0xdf, 0x46, 0xee, 0x9f,
+				},
+				Size:       1198,
+				Executable: false,
+			},
+			{
+				Name: []byte("route.8.gz"),
+				Digest: []byte{
+					0x2a, 0x5a, 0x4b, 0x4f, 0x91, 0xf2, 0x78, 0xe4, 0xa9, 0x25, 0xb2, 0x7f, 0xa7, 0x2a, 0xc0, 0x8a, 0x4a, 0x65, 0xc9, 0x5f, 0x07, 0xa0, 0x48, 0x44, 0xeb, 0x46, 0xf9, 0xc9, 0xe1, 0x17, 0x96, 0x21,
+				},
+				Size:       3525,
+				Executable: false,
+			},
+			{
+				Name: []byte("slattach.8.gz"),
+				Digest: []byte{
+					0x3f, 0x05, 0x6b, 0x20, 0xe1, 0xe4, 0xf0, 0xba, 0x16, 0x15, 0x66, 0x6b, 0x57, 0x96, 0xe9, 0x9d, 0x83, 0xa8, 0x20, 0xaf, 0x8a, 0xca, 0x16, 0x4d, 0xa2, 0x6d, 0x94, 0x8e, 0xca, 0x91, 0x8f, 0xd4,
+				},
+				Size:       1441,
+				Executable: false,
+			},
+		},
+		Symlinks: []*castorev1pb.SymlinkNode{},
+	}
+
+	// /share/man holds /share/man/man{1,5,8}.
+	expectedDirectories["/share/man"] = &castorev1pb.Directory{
+		Directories: []*castorev1pb.DirectoryNode{
+			{
+				Name:   []byte("man1"),
+				Digest: mustDirectoryDigest(expectedDirectories["/share/man/man1"]),
+				Size:   expectedDirectories["/share/man/man1"].Size(),
+			},
+			{
+				Name:   []byte("man5"),
+				Digest: mustDirectoryDigest(expectedDirectories["/share/man/man5"]),
+				Size:   expectedDirectories["/share/man/man5"].Size(),
+			},
+			{
+				Name:   []byte("man8"),
+				Digest: mustDirectoryDigest(expectedDirectories["/share/man/man8"]),
+				Size:   expectedDirectories["/share/man/man8"].Size(),
+			},
+		},
+		Files:    []*castorev1pb.FileNode{},
+		Symlinks: []*castorev1pb.SymlinkNode{},
+	}
+
+	// /share holds /share/man.
+	expectedDirectories["/share"] = &castorev1pb.Directory{
+		Directories: []*castorev1pb.DirectoryNode{
+			{
+				Name:   []byte("man"),
+				Digest: mustDirectoryDigest(expectedDirectories["/share/man"]),
+				Size:   expectedDirectories["/share/man"].Size(),
+			},
+		},
+		Files:    []*castorev1pb.FileNode{},
+		Symlinks: []*castorev1pb.SymlinkNode{},
+	}
+
+	// / holds /bin, /share, and a /sbin symlink.
+	expectedDirectories["/"] = &castorev1pb.Directory{
+		Directories: []*castorev1pb.DirectoryNode{
+			{
+				Name:   []byte("bin"),
+				Digest: mustDirectoryDigest(expectedDirectories["/bin"]),
+				Size:   expectedDirectories["/bin"].Size(),
+			},
+			{
+				Name:   []byte("share"),
+				Digest: mustDirectoryDigest(expectedDirectories["/share"]),
+				Size:   expectedDirectories["/share"].Size(),
+			},
+		},
+		Files: []*castorev1pb.FileNode{},
+		Symlinks: []*castorev1pb.SymlinkNode{
+			{
+				Name:   []byte("sbin"),
+				Target: []byte("bin"),
+			},
+		},
+	}
+	// assert we populated the two fixtures properly
+	require.Equal(t, len(expectedDirectoryPaths), len(expectedDirectories))
+
+	numDirectoriesReceived := 0
+
+	rootNode, narSize, narSha256, err := importer.Import(
+		context.Background(),
+		f,
+		func(blobReader io.Reader) ([]byte, error) {
+			// Don't really bother reading and comparing the contents here,
+			// We already verify the right digests are produced by comparing the
+			// directoryCb calls, and TestRegular ensures the reader works.
+			return mustBlobDigest(blobReader), nil
+		}, func(directory *castorev1pb.Directory) ([]byte, error) {
+			// use actualDirectoryOrder to look up the Directory object we expect at this specific invocation.
+			currentDirectoryPath := expectedDirectoryPaths[numDirectoriesReceived]
+
+			expectedDirectory, found := expectedDirectories[currentDirectoryPath]
+			require.True(t, found, "must find the current directory")
+
+			requireProtoEq(t, expectedDirectory, directory)
+
+			numDirectoriesReceived += 1
+			return mustDirectoryDigest(directory), nil
+		},
+	)
+	require.NoError(t, err)
+	require.Equal(t, &castorev1pb.Node{
+		Node: &castorev1pb.Node_Directory{
+			Directory: &castorev1pb.DirectoryNode{
+				Name:   []byte(""),
+				Digest: mustDirectoryDigest(expectedDirectories["/"]),
+				Size:   expectedDirectories["/"].Size(),
+			},
+		},
+	}, rootNode)
+	require.Equal(t, []byte{
+		0xc6, 0xe1, 0x55, 0xb3, 0x45, 0x6e, 0x30, 0xb7, 0x61, 0x22, 0x63, 0xec, 0x09, 0x50, 0x70, 0x81, 0x1c, 0xaf, 0x8a, 0xbf, 0xd5, 0x9f, 0xaa, 0x72, 0xab, 0x82, 0xa5, 0x92, 0xef, 0xde, 0xb2, 0x53,
+	}, narSha256)
+	require.Equal(t, uint64(464152), narSize)
+}
+
+// TestCallbackErrors ensures that errors returned from the callback function
+// bubble up to the importer process, and are not ignored.
+func TestCallbackErrors(t *testing.T) {
+	t.Run("callback blob", func(t *testing.T) {
+		// Pick an example NAR with a regular file.
+		f, err := os.Open("../../testdata/onebyteregular.nar")
+		require.NoError(t, err)
+
+		targetErr := errors.New("expected error")
+
+		_, _, _, err = importer.Import(
+			context.Background(),
+			f,
+			func(blobReader io.Reader) ([]byte, error) {
+				return nil, targetErr
+			}, func(directory *castorev1pb.Directory) ([]byte, error) {
+				panic("no directories expected!")
+			},
+		)
+		require.ErrorIs(t, err, targetErr)
+	})
+	t.Run("callback directory", func(t *testing.T) {
+		// Pick an example NAR with a directory node
+		f, err := os.Open("../../testdata/emptydirectory.nar")
+		require.NoError(t, err)
+
+		targetErr := errors.New("expected error")
+
+		_, _, _, err = importer.Import(
+			context.Background(),
+			f,
+			func(blobReader io.Reader) ([]byte, error) {
+				panic("no file contents expected!")
+			}, func(directory *castorev1pb.Directory) ([]byte, error) {
+				return nil, targetErr
+			},
+		)
+		require.ErrorIs(t, err, targetErr)
+	})
+}
+
+// TestPopDirectories is a regression test that ensures we handle the directory
+// stack properly.
+//
+// This test case looks like:
+//
+// / (dir)
+// /test (dir)
+// /test/tested (file)
+// /tested (file)
+//
+// We used to have a bug where the second `tested` file would appear as if
+// it was in the `/test` dir because it has that dir as a string prefix.
+func TestPopDirectories(t *testing.T) {
+	f, err := os.Open("../../testdata/popdirectories.nar")
+	require.NoError(t, err)
+	defer f.Close()
+
+	_, _, _, err = importer.Import(
+		context.Background(),
+		f,
+		func(blobReader io.Reader) ([]byte, error) { return mustBlobDigest(blobReader), nil },
+		func(directory *castorev1pb.Directory) ([]byte, error) {
+			require.NoError(t, directory.Validate(), "directory validation shouldn't error")
+			return mustDirectoryDigest(directory), nil
+		},
+	)
+	require.NoError(t, err)
+}
diff --git a/tvix/nar-bridge-go/pkg/importer/roundtrip_test.go b/tvix/nar-bridge-go/pkg/importer/roundtrip_test.go
new file mode 100644
index 000000000000..c50d332d85dc
--- /dev/null
+++ b/tvix/nar-bridge-go/pkg/importer/roundtrip_test.go
@@ -0,0 +1,85 @@
+package importer_test
+
+import (
+	"bytes"
+	"context"
+	"encoding/base64"
+	"fmt"
+	"io"
+	"os"
+	"sync"
+	"testing"
+
+	castorev1pb "code.tvl.fyi/tvix/castore-go"
+	"code.tvl.fyi/tvix/nar-bridge-go/pkg/importer"
+	storev1pb "code.tvl.fyi/tvix/store-go"
+	"github.com/stretchr/testify/require"
+)
+
+func TestRoundtrip(t *testing.T) {
+	// We pipe nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar to
+	// storev1pb.Export, and store all the file contents and directory objects
+	// received in two hashmaps.
+	// We then feed it to the writer, and test we come up with the same NAR file.
+
+	f, err := os.Open("../../testdata/nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar")
+	require.NoError(t, err)
+
+	narContents, err := io.ReadAll(f)
+	require.NoError(t, err)
+
+	var mu sync.Mutex
+	blobsMap := make(map[string][]byte, 0)
+	directoriesMap := make(map[string]*castorev1pb.Directory)
+
+	rootNode, _, _, err := importer.Import(
+		context.Background(),
+		bytes.NewBuffer(narContents),
+		func(blobReader io.Reader) ([]byte, error) {
+			// read in contents, we need to put it into filesMap later.
+			contents, err := io.ReadAll(blobReader)
+			require.NoError(t, err)
+
+			dgst := mustBlobDigest(bytes.NewReader(contents))
+
+			// put it in filesMap
+			mu.Lock()
+			blobsMap[base64.StdEncoding.EncodeToString(dgst)] = contents
+			mu.Unlock()
+
+			return dgst, nil
+		},
+		func(directory *castorev1pb.Directory) ([]byte, error) {
+			dgst := mustDirectoryDigest(directory)
+
+			directoriesMap[base64.StdEncoding.EncodeToString(dgst)] = directory
+			return dgst, nil
+		},
+	)
+
+	require.NoError(t, err)
+
+	// done populating everything, now actually test the export :-)
+	var narBuf bytes.Buffer
+	err = storev1pb.Export(
+		&narBuf,
+		rootNode,
+		func(directoryDgst []byte) (*castorev1pb.Directory, error) {
+			d, found := directoriesMap[base64.StdEncoding.EncodeToString(directoryDgst)]
+			if !found {
+				panic(fmt.Sprintf("directory %v not found", base64.StdEncoding.EncodeToString(directoryDgst)))
+			}
+			return d, nil
+		},
+		func(blobDgst []byte) (io.ReadCloser, error) {
+			blobContents, found := blobsMap[base64.StdEncoding.EncodeToString(blobDgst)]
+			if !found {
+				panic(fmt.Sprintf("blob      %v not found", base64.StdEncoding.EncodeToString(blobDgst)))
+			}
+			return io.NopCloser(bytes.NewReader(blobContents)), nil
+		},
+	)
+
+	require.NoError(t, err, "exporter shouldn't fail")
+	require.Equal(t, narContents, narBuf.Bytes())
+}
diff --git a/tvix/nar-bridge-go/pkg/importer/util_test.go b/tvix/nar-bridge-go/pkg/importer/util_test.go
new file mode 100644
index 000000000000..06353cf582e5
--- /dev/null
+++ b/tvix/nar-bridge-go/pkg/importer/util_test.go
@@ -0,0 +1,34 @@
+package importer_test
+
+import (
+	"io"
+	"testing"
+
+	castorev1pb "code.tvl.fyi/tvix/castore-go"
+	"github.com/google/go-cmp/cmp"
+	"google.golang.org/protobuf/testing/protocmp"
+	"lukechampine.com/blake3"
+)
+
+func requireProtoEq(t *testing.T, expected interface{}, actual interface{}) {
+	if diff := cmp.Diff(expected, actual, protocmp.Transform()); diff != "" {
+		t.Errorf("unexpected difference:\n%v", diff)
+	}
+}
+
+func mustDirectoryDigest(d *castorev1pb.Directory) []byte {
+	dgst, err := d.Digest()
+	if err != nil {
+		panic(err)
+	}
+	return dgst
+}
+
+func mustBlobDigest(r io.Reader) []byte {
+	hasher := blake3.New(32, nil)
+	_, err := io.Copy(hasher, r)
+	if err != nil {
+		panic(err)
+	}
+	return hasher.Sum([]byte{})
+}