diff options
author | Florian Klink <flokli@flokli.de> | 2024-05-14T10·35+0200 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2024-05-15T21·31+0000 |
commit | 1392913e981ae4edbec6ef39a4d3de44749ad81c (patch) | |
tree | 899672eac93c185a11a125e2f8d1c41367edbf17 /tvix/nar-bridge-go | |
parent | ce1aa10b694662a3bb4061184312de7a422cfe42 (diff) |
chore(tvix/nar-bridge): move to nar-bridge-go r/8147
Make some space for the rust implementation. Change-Id: I924dc1657be10abe5a11951c3b9de50bae06db19 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11662 Tested-by: BuildkiteCI Autosubmit: flokli <flokli@flokli.de> Reviewed-by: yuka <yuka@yuka.dev>
Diffstat (limited to 'tvix/nar-bridge-go')
28 files changed, 2346 insertions, 0 deletions
diff --git a/tvix/nar-bridge-go/.gitignore b/tvix/nar-bridge-go/.gitignore new file mode 100644 index 000000000000..d70e1f8120cc --- /dev/null +++ b/tvix/nar-bridge-go/.gitignore @@ -0,0 +1,2 @@ +/nar-bridge-http +/nar-bridge-pathinfo diff --git a/tvix/nar-bridge-go/README.md b/tvix/nar-bridge-go/README.md new file mode 100644 index 000000000000..81431daf38ff --- /dev/null +++ b/tvix/nar-bridge-go/README.md @@ -0,0 +1,7 @@ +# //tvix/nar-bridge-go + +This exposes a HTTP Binary cache interface (GET/HEAD/PUT requests) for a `tvix- +store`. + +It can be used to configure a tvix-store as a substitutor for Nix, or to upload +store paths from Nix via `nix copy` into a `tvix-store`. diff --git a/tvix/nar-bridge-go/cmd/nar-bridge-http/main.go b/tvix/nar-bridge-go/cmd/nar-bridge-http/main.go new file mode 100644 index 000000000000..cf2aaf4901b2 --- /dev/null +++ b/tvix/nar-bridge-go/cmd/nar-bridge-http/main.go @@ -0,0 +1,93 @@ +package main + +import ( + "context" + "os" + "os/signal" + "runtime/debug" + "time" + + "github.com/alecthomas/kong" + + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + + castorev1pb "code.tvl.fyi/tvix/castore-go" + narBridgeHttp "code.tvl.fyi/tvix/nar-bridge-go/pkg/http" + storev1pb "code.tvl.fyi/tvix/store-go" + log "github.com/sirupsen/logrus" +) + +// `help:"Expose a tvix-store gRPC Interface as HTTP NAR/NARinfo"` +var cli struct { + LogLevel string `enum:"trace,debug,info,warn,error,fatal,panic" help:"The log level to log with" default:"info"` + ListenAddr string `name:"listen-addr" help:"The address this service listens on" type:"string" default:"[::]:9000"` //nolint:lll + EnableAccessLog bool `name:"access-log" help:"Enable access logging" type:"bool" default:"true" negatable:""` //nolint:lll + StoreAddr string `name:"store-addr" help:"The address to the tvix-store RPC interface this will connect to" default:"localhost:8000"` //nolint:lll + EnableOtlp bool `name:"otlp" help:"Enable OpenTelemetry for logs, spans, and metrics" default:"true"` //nolint:lll +} + +func main() { + _ = kong.Parse(&cli) + + logLevel, err := log.ParseLevel(cli.LogLevel) + if err != nil { + log.Panic("invalid log level") + return + } + log.SetLevel(logLevel) + + ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt) + defer stop() + + if cli.EnableOtlp { + buildInfo, ok := debug.ReadBuildInfo() + if !ok { + log.Fatal("failed to read build info") + } + + shutdown, err := setupOpenTelemetry(ctx, "nar-bridge-http", buildInfo.Main.Version) + if err != nil { + log.WithError(err).Fatal("failed to setup OpenTelemetry") + } + defer shutdown(context.Background()) + } + + // connect to tvix-store + log.Debugf("Dialing to %v", cli.StoreAddr) + conn, err := grpc.DialContext(ctx, cli.StoreAddr, + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithStatsHandler(otelgrpc.NewClientHandler()), + ) + if err != nil { + log.Fatalf("did not connect: %v", err) + } + defer conn.Close() + + s := narBridgeHttp.New( + castorev1pb.NewDirectoryServiceClient(conn), + castorev1pb.NewBlobServiceClient(conn), + storev1pb.NewPathInfoServiceClient(conn), + cli.EnableAccessLog, + 30, + ) + + log.Printf("Starting nar-bridge-http at %v", cli.ListenAddr) + go s.ListenAndServe(cli.ListenAddr) + + // listen for the interrupt signal. + <-ctx.Done() + + // Restore default behaviour on the interrupt signal + stop() + log.Info("Received Signal, shutting down, press Ctl+C again to force.") + + timeoutCtx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + if err := s.Shutdown(timeoutCtx); err != nil { + log.WithError(err).Warn("failed to shutdown") + os.Exit(1) + } +} diff --git a/tvix/nar-bridge-go/cmd/nar-bridge-http/otel.go b/tvix/nar-bridge-go/cmd/nar-bridge-http/otel.go new file mode 100644 index 000000000000..c446c6ec1a14 --- /dev/null +++ b/tvix/nar-bridge-go/cmd/nar-bridge-http/otel.go @@ -0,0 +1,87 @@ +package main + +import ( + "context" + "errors" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" + "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.24.0" +) + +func setupOpenTelemetry(ctx context.Context, serviceName, serviceVersion string) (func(context.Context) error, error) { + var shutdownFuncs []func(context.Context) error + shutdown := func(ctx context.Context) error { + var err error + for _, fn := range shutdownFuncs { + err = errors.Join(err, fn(ctx)) + } + shutdownFuncs = nil + return err + } + + res, err := resource.Merge( + resource.Default(), + resource.NewWithAttributes( + semconv.SchemaURL, + semconv.ServiceName(serviceName), + semconv.ServiceVersion(serviceVersion), + ), + ) + if err != nil { + return nil, errors.Join(err, shutdown(ctx)) + } + + prop := propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + ) + otel.SetTextMapPropagator(prop) + + tracerProvider, err := newTraceProvider(ctx, res) + if err != nil { + return nil, errors.Join(err, shutdown(ctx)) + } + shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown) + otel.SetTracerProvider(tracerProvider) + + meterProvider, err := newMeterProvider(ctx, res) + if err != nil { + return nil, errors.Join(err, shutdown(ctx)) + } + shutdownFuncs = append(shutdownFuncs, meterProvider.Shutdown) + otel.SetMeterProvider(meterProvider) + + return shutdown, nil +} + +func newTraceProvider(ctx context.Context, res *resource.Resource) (*trace.TracerProvider, error) { + traceExporter, err := otlptracegrpc.New(ctx) + if err != nil { + return nil, err + } + + traceProvider := trace.NewTracerProvider( + trace.WithBatcher(traceExporter), + trace.WithResource(res), + ) + return traceProvider, nil +} + +func newMeterProvider(ctx context.Context, res *resource.Resource) (*metric.MeterProvider, error) { + metricExporter, err := otlpmetricgrpc.New(ctx) + if err != nil { + return nil, err + } + + meterProvider := metric.NewMeterProvider( + metric.WithResource(res), + metric.WithReader(metric.NewPeriodicReader(metricExporter)), + ) + return meterProvider, nil +} diff --git a/tvix/nar-bridge-go/default.nix b/tvix/nar-bridge-go/default.nix new file mode 100644 index 000000000000..303d9c504114 --- /dev/null +++ b/tvix/nar-bridge-go/default.nix @@ -0,0 +1,10 @@ +# Target containing just the proto files. + +{ depot, pkgs, lib, ... }: + +pkgs.buildGoModule { + name = "nar-bridge-go"; + src = depot.third_party.gitignoreSource ./.; + + vendorHash = "sha256-7jugbC5sEGhppjiZgnoLP5A6kQSaHK9vE6cXVZBG22s="; +} diff --git a/tvix/nar-bridge-go/go.mod b/tvix/nar-bridge-go/go.mod new file mode 100644 index 000000000000..3aa0694ff7b1 --- /dev/null +++ b/tvix/nar-bridge-go/go.mod @@ -0,0 +1,54 @@ +module code.tvl.fyi/tvix/nar-bridge-go + +require ( + code.tvl.fyi/tvix/castore-go v0.0.0-20231105151352-990d6ba2175e + code.tvl.fyi/tvix/store-go v0.0.0-20231105203234-f2baad42494f + github.com/alecthomas/kong v0.7.1 + github.com/go-chi/chi v1.5.4 + github.com/go-chi/chi/v5 v5.0.7 + github.com/google/go-cmp v0.6.0 + github.com/multiformats/go-multihash v0.2.1 + github.com/nix-community/go-nix v0.0.0-20231012070617-9b176785e54d + github.com/sirupsen/logrus v1.9.0 + github.com/stretchr/testify v1.8.4 + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.47.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.47.0 + go.opentelemetry.io/otel v1.22.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.45.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.22.0 + go.opentelemetry.io/otel/sdk v1.22.0 + go.opentelemetry.io/otel/sdk/metric v1.22.0 + golang.org/x/sync v0.4.0 + google.golang.org/grpc v1.60.1 + google.golang.org/protobuf v1.32.0 + lukechampine.com/blake3 v1.2.1 +) + +require ( + github.com/cenkalti/backoff/v4 v4.2.1 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-logr/logr v1.4.1 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/golang/protobuf v1.5.3 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect + github.com/klauspost/cpuid/v2 v2.2.5 // indirect + github.com/minio/sha256-simd v1.0.0 // indirect + github.com/mr-tron/base58 v1.2.0 // indirect + github.com/multiformats/go-varint v0.0.6 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/spaolacci/murmur3 v1.1.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.22.0 // indirect + go.opentelemetry.io/otel/metric v1.22.0 // indirect + go.opentelemetry.io/otel/trace v1.22.0 // indirect + go.opentelemetry.io/proto/otlp v1.0.0 // indirect + golang.org/x/crypto v0.18.0 // indirect + golang.org/x/net v0.20.0 // indirect + golang.org/x/sys v0.16.0 // indirect + golang.org/x/text v0.14.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20231002182017-d307bd883b97 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20231030173426-d783a09b4405 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) + +go 1.19 diff --git a/tvix/nar-bridge-go/go.sum b/tvix/nar-bridge-go/go.sum new file mode 100644 index 000000000000..39f77b906128 --- /dev/null +++ b/tvix/nar-bridge-go/go.sum @@ -0,0 +1,120 @@ +cloud.google.com/go/compute v1.23.0 h1:tP41Zoavr8ptEqaW6j+LQOnyBBhO7OkOMAGrgLopTwY= +cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= +code.tvl.fyi/tvix/castore-go v0.0.0-20231105151352-990d6ba2175e h1:Nj+anfyEYeEdhnIo2BG/N1ZwQl1IvI7AH3TbNDLwUOA= +code.tvl.fyi/tvix/castore-go v0.0.0-20231105151352-990d6ba2175e/go.mod h1:+vKbozsa04yy2TWh3kUVU568jaza3Hf0p1jAEoMoCwA= +code.tvl.fyi/tvix/store-go v0.0.0-20231105203234-f2baad42494f h1:bN3K7oSu3IAHXqS3ETHUgpBPHF9+awKKBRLiM8/1tmI= +code.tvl.fyi/tvix/store-go v0.0.0-20231105203234-f2baad42494f/go.mod h1:8jpfSC2rGi6VKaKOqqgmflPVSEpUawuRQFwQpQYCMiA= +github.com/alecthomas/assert/v2 v2.1.0 h1:tbredtNcQnoSd3QBhQWI7QZ3XHOVkw1Moklp2ojoH/0= +github.com/alecthomas/kong v0.7.1 h1:azoTh0IOfwlAX3qN9sHWTxACE2oV8Bg2gAwBsMwDQY4= +github.com/alecthomas/kong v0.7.1/go.mod h1:n1iCIO2xS46oE8ZfYCNDqdR0b0wZNrXAIAqro/2132U= +github.com/alecthomas/repr v0.1.0 h1:ENn2e1+J3k09gyj2shc0dHr/yjaWSHRlrJ4DPMevDqE= +github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= +github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+gqO04wryn5h75LSazbRlnya1k= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/envoyproxy/protoc-gen-validate v1.0.2 h1:QkIBuU5k+x7/QXPvPPnWXWlCdaBFApVqftFV6k087DA= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-chi/chi v1.5.4 h1:QHdzF2szwjqVV4wmByUnTcsbIg7UGaQ0tPF2t5GcAIs= +github.com/go-chi/chi v1.5.4/go.mod h1:uaf8YgoFazUOkPBG7fxPftUylNumIev9awIWOENIuEg= +github.com/go-chi/chi/v5 v5.0.7 h1:rDTPXLDHGATaeHvVlLcR4Qe0zftYethFucbjVQ1PxU8= +github.com/go-chi/chi/v5 v5.0.7/go.mod h1:DslCQbL2OYiznFReuXYUmQ2hGd1aDpCnlMNITLSKoi8= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= +github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= +github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg= +github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= +github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= +github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g= +github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= +github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= +github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= +github.com/multiformats/go-multihash v0.2.1 h1:aem8ZT0VA2nCHHk7bPJ1BjUbHNciqZC/d16Vve9l108= +github.com/multiformats/go-multihash v0.2.1/go.mod h1:WxoMcYG85AZVQUyRyo9s4wULvW5qrI9vb2Lt6evduFc= +github.com/multiformats/go-varint v0.0.6 h1:gk85QWKxh3TazbLxED/NlDVv8+q+ReFJk7Y2W/KhfNY= +github.com/multiformats/go-varint v0.0.6/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE= +github.com/nix-community/go-nix v0.0.0-20231012070617-9b176785e54d h1:kwc1ivTuStqa3iBC2M/ojWPor88+YeIbZGeD2SlMYZ0= +github.com/nix-community/go-nix v0.0.0-20231012070617-9b176785e54d/go.mod h1:4ZJah5sYrUSsWXIOJIsQ6iVOQyLO+ffhWXU3gblcO+E= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= +github.com/sirupsen/logrus v1.9.0 h1:trlNQbNUG3OdDrDil03MCb1H2o9nJ1x4/5LYw7byDE0= +github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= +github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= +github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.47.0 h1:UNQQKPfTDe1J81ViolILjTKPr9WetKW6uei2hFgJmFs= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.47.0/go.mod h1:r9vWsPS/3AQItv3OSlEJ/E4mbrhUbbw18meOjArPtKQ= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.47.0 h1:sv9kVfal0MK0wBMCOGr+HeJm9v803BkJxGrk2au7j08= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.47.0/go.mod h1:SK2UL73Zy1quvRPonmOmRDiWk1KBV3LyIeeIxcEApWw= +go.opentelemetry.io/otel v1.22.0 h1:xS7Ku+7yTFvDfDraDIJVpw7XPyuHlB9MCiqqX5mcJ6Y= +go.opentelemetry.io/otel v1.22.0/go.mod h1:eoV4iAi3Ea8LkAEI9+GFT44O6T/D0GWAVFyZVCC6pMI= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.45.0 h1:tfil6di0PoNV7FZdsCS7A5izZoVVQ7AuXtyekbOpG/I= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.45.0/go.mod h1:AKFZIEPOnqB00P63bTjOiah4ZTaRzl1TKwUWpZdYUHI= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.22.0 h1:9M3+rhx7kZCIQQhQRYaZCdNu1V73tm4TvXs2ntl98C4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.22.0/go.mod h1:noq80iT8rrHP1SfybmPiRGc9dc5M8RPmGvtwo7Oo7tc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.22.0 h1:H2JFgRcGiyHg7H7bwcwaQJYrNFqCqrbTQ8K4p1OvDu8= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.22.0/go.mod h1:WfCWp1bGoYK8MeULtI15MmQVczfR+bFkk0DF3h06QmQ= +go.opentelemetry.io/otel/metric v1.22.0 h1:lypMQnGyJYeuYPhOM/bgjbFM6WE44W1/T45er4d8Hhg= +go.opentelemetry.io/otel/metric v1.22.0/go.mod h1:evJGjVpZv0mQ5QBRJoBF64yMuOf4xCWdXjK8pzFvliY= +go.opentelemetry.io/otel/sdk v1.22.0 h1:6coWHw9xw7EfClIC/+O31R8IY3/+EiRFHevmHafB2Gw= +go.opentelemetry.io/otel/sdk v1.22.0/go.mod h1:iu7luyVGYovrRpe2fmj3CVKouQNdTOkxtLzPvPz1DOc= +go.opentelemetry.io/otel/sdk/metric v1.22.0 h1:ARrRetm1HCVxq0cbnaZQlfwODYJHo3gFL8Z3tSmHBcI= +go.opentelemetry.io/otel/sdk/metric v1.22.0/go.mod h1:KjQGeMIDlBNEOo6HvjhxIec1p/69/kULDcp4gr0oLQQ= +go.opentelemetry.io/otel/trace v1.22.0 h1:Hg6pPujv0XG9QaVbGOBVHunyuLcCC3jN7WEhPx83XD0= +go.opentelemetry.io/otel/trace v1.22.0/go.mod h1:RbbHXVqKES9QhzZq/fE5UnOSILqRt40a21sPw2He1xo= +go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I= +go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc= +golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg= +golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= +golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= +golang.org/x/oauth2 v0.13.0 h1:jDDenyj+WgFtmV3zYVoi8aE2BwtXFLWOA67ZfNWftiY= +golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ= +golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU= +golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= +google.golang.org/genproto v0.0.0-20231016165738-49dd2c1f3d0b h1:+YaDE2r2OG8t/z5qmsh7Y+XXwCbvadxxZ0YY6mTdrVA= +google.golang.org/genproto/googleapis/api v0.0.0-20231002182017-d307bd883b97 h1:W18sezcAYs+3tDZX4F80yctqa12jcP1PUS2gQu1zTPU= +google.golang.org/genproto/googleapis/api v0.0.0-20231002182017-d307bd883b97/go.mod h1:iargEX0SFPm3xcfMI0d1domjg0ZF4Aa0p2awqyxhvF0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20231030173426-d783a09b4405 h1:AB/lmRny7e2pLhFEYIbl5qkDAUt2h0ZRO4wGPhZf+ik= +google.golang.org/genproto/googleapis/rpc v0.0.0-20231030173426-d783a09b4405/go.mod h1:67X1fPuzjcrkymZzZV1vvkFeTn2Rvc6lYF9MYFGCcwE= +google.golang.org/grpc v1.60.1 h1:26+wFr+cNqSGFcOXcabYC0lUVJVRa2Sb2ortSK7VrEU= +google.golang.org/grpc v1.60.1/go.mod h1:OlCHIeLYqSSsLi6i49B5QGdzaMZK9+M7LXN2FKz4eGM= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I= +google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +lukechampine.com/blake3 v1.2.1 h1:YuqqRuaqsGV71BV/nm9xlI0MKUv4QC54jQnBChWbGnI= +lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k= diff --git a/tvix/nar-bridge-go/pkg/http/nar_get.go b/tvix/nar-bridge-go/pkg/http/nar_get.go new file mode 100644 index 000000000000..75797f8da90e --- /dev/null +++ b/tvix/nar-bridge-go/pkg/http/nar_get.go @@ -0,0 +1,197 @@ +package http + +import ( + "bytes" + "context" + "encoding/base64" + "encoding/hex" + "errors" + "fmt" + "io" + "io/fs" + "net/http" + "sync" + + castorev1pb "code.tvl.fyi/tvix/castore-go" + storev1pb "code.tvl.fyi/tvix/store-go" + "github.com/go-chi/chi/v5" + nixhash "github.com/nix-community/go-nix/pkg/hash" + "github.com/nix-community/go-nix/pkg/nixbase32" + log "github.com/sirupsen/logrus" +) + +const ( + narUrl = "/nar/{narhash:^([" + nixbase32.Alphabet + "]{52})$}.nar" +) + +func renderNar( + ctx context.Context, + log *log.Entry, + directoryServiceClient castorev1pb.DirectoryServiceClient, + blobServiceClient castorev1pb.BlobServiceClient, + narHashDbMu *sync.Mutex, + narHashDb map[string]*narData, + w io.Writer, + narHash *nixhash.Hash, + headOnly bool, +) error { + // look in the lookup table + narHashDbMu.Lock() + narData, found := narHashDb[narHash.SRIString()] + narHashDbMu.Unlock() + + rootNode := narData.rootNode + + // if we didn't find anything, return 404. + if !found { + return fmt.Errorf("narHash not found: %w", fs.ErrNotExist) + } + + // if this was only a head request, we're done. + if headOnly { + return nil + } + + directories := make(map[string]*castorev1pb.Directory) + + // If the root node is a directory, ask the directory service for all directories + if pathInfoDirectory := rootNode.GetDirectory(); pathInfoDirectory != nil { + rootDirectoryDigest := pathInfoDirectory.GetDigest() + log = log.WithField("root_directory", base64.StdEncoding.EncodeToString(rootDirectoryDigest)) + + directoryStream, err := directoryServiceClient.Get(ctx, &castorev1pb.GetDirectoryRequest{ + ByWhat: &castorev1pb.GetDirectoryRequest_Digest{ + Digest: rootDirectoryDigest, + }, + Recursive: true, + }) + if err != nil { + return fmt.Errorf("unable to query directory stream: %w", err) + } + + // For now, we just stream all of these locally and put them into a hashmap, + // which is used in the lookup function below. + for { + directory, err := directoryStream.Recv() + if err != nil { + if err == io.EOF { + break + } + return fmt.Errorf("unable to receive from directory stream: %w", err) + } + + // calculate directory digest + // TODO: do we need to do any more validation? + directoryDgst, err := directory.Digest() + if err != nil { + return fmt.Errorf("unable to calculate directory digest: %w", err) + } + + log.WithField("directory", base64.StdEncoding.EncodeToString(directoryDgst)).Debug("received directory node") + + directories[hex.EncodeToString(directoryDgst)] = directory + } + + } + + // render the NAR file + err := storev1pb.Export( + w, + rootNode, + func(directoryDigest []byte) (*castorev1pb.Directory, error) { + log.WithField("directory", base64.StdEncoding.EncodeToString(directoryDigest)).Debug("Get directory") + directoryRefStr := hex.EncodeToString(directoryDigest) + directory, found := directories[directoryRefStr] + if !found { + return nil, fmt.Errorf( + "directory with hash %v does not exist: %w", + directoryDigest, + fs.ErrNotExist, + ) + } + + return directory, nil + }, + func(blobDigest []byte) (io.ReadCloser, error) { + log.WithField("blob", base64.StdEncoding.EncodeToString(blobDigest)).Debug("Get blob") + resp, err := blobServiceClient.Read(ctx, &castorev1pb.ReadBlobRequest{ + Digest: blobDigest, + }) + if err != nil { + return nil, fmt.Errorf("unable to get blob: %w", err) + } + + // set up a pipe, let a goroutine write, return the reader. + pR, pW := io.Pipe() + + go func() { + for { + chunk, err := resp.Recv() + if errors.Is(err, io.EOF) { + break + } + if err != nil { + pW.CloseWithError(fmt.Errorf("receiving chunk: %w", err)) + return + } + + // write the received chunk to the writer part of the pipe + if _, err := io.Copy(pW, bytes.NewReader(chunk.GetData())); err != nil { + log.WithError(err).Error("writing chunk to pipe") + pW.CloseWithError(fmt.Errorf("writing chunk to pipe: %w", err)) + return + } + } + pW.Close() + + }() + + return io.NopCloser(pR), nil + }, + ) + if err != nil { + return fmt.Errorf("unable to export nar: %w", err) + } + return nil +} + +func registerNarGet(s *Server) { + // produce a handler for rendering NAR files. + genNarHandler := func(isHead bool) func(w http.ResponseWriter, r *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + defer r.Body.Close() + + ctx := r.Context() + + // parse the narhash sent in the request URL + narHash, err := parseNarHashFromUrl(chi.URLParamFromCtx(ctx, "narhash")) + if err != nil { + log.WithError(err).WithField("url", r.URL).Error("unable to decode nar hash from url") + w.WriteHeader(http.StatusBadRequest) + _, err := w.Write([]byte("unable to decode nar hash from url")) + if err != nil { + log.WithError(err).Errorf("unable to write error message to client") + } + + return + } + + log := log.WithField("narhash_url", narHash.SRIString()) + + // TODO: inline more of that function here? + err = renderNar(ctx, log, s.directoryServiceClient, s.blobServiceClient, &s.narDbMu, s.narDb, w, narHash, isHead) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + w.WriteHeader(http.StatusNotFound) + } else { + log.WithError(err).Warn("unable to render nar") + w.WriteHeader(http.StatusInternalServerError) + } + } + + } + } + + s.handler.Head(narUrl, genNarHandler(true)) + s.handler.Get(narUrl, genNarHandler(false)) +} diff --git a/tvix/nar-bridge-go/pkg/http/nar_put.go b/tvix/nar-bridge-go/pkg/http/nar_put.go new file mode 100644 index 000000000000..96bdd38b709d --- /dev/null +++ b/tvix/nar-bridge-go/pkg/http/nar_put.go @@ -0,0 +1,141 @@ +package http + +import ( + "bufio" + "bytes" + "fmt" + "net/http" + + castorev1pb "code.tvl.fyi/tvix/castore-go" + "code.tvl.fyi/tvix/nar-bridge-go/pkg/importer" + "github.com/go-chi/chi/v5" + mh "github.com/multiformats/go-multihash/core" + nixhash "github.com/nix-community/go-nix/pkg/hash" + "github.com/sirupsen/logrus" + log "github.com/sirupsen/logrus" +) + +func registerNarPut(s *Server) { + s.handler.Put(narUrl, func(w http.ResponseWriter, r *http.Request) { + defer r.Body.Close() + + ctx := r.Context() + + // parse the narhash sent in the request URL + narHashFromUrl, err := parseNarHashFromUrl(chi.URLParamFromCtx(ctx, "narhash")) + if err != nil { + log.WithError(err).WithField("url", r.URL).Error("unable to decode nar hash from url") + w.WriteHeader(http.StatusBadRequest) + _, err := w.Write([]byte("unable to decode nar hash from url")) + if err != nil { + log.WithError(err).Error("unable to write error message to client") + } + + return + } + + log := log.WithField("narhash_url", narHashFromUrl.SRIString()) + + directoriesUploader := importer.NewDirectoriesUploader(ctx, s.directoryServiceClient) + defer directoriesUploader.Done() //nolint:errcheck + + rootNode, narSize, narSha256, err := importer.Import( + ctx, + // buffer the body by 10MiB + bufio.NewReaderSize(r.Body, 10*1024*1024), + importer.GenBlobUploaderCb(ctx, s.blobServiceClient), + func(directory *castorev1pb.Directory) ([]byte, error) { + return directoriesUploader.Put(directory) + }, + ) + + if err != nil { + log.Errorf("error during NAR import: %v", err) + w.WriteHeader(http.StatusInternalServerError) + _, err := w.Write([]byte(fmt.Sprintf("error during NAR import: %v", err))) + if err != nil { + log.WithError(err).Errorf("unable to write error message to client") + } + + return + } + + log.Debug("closing the stream") + + // Close the directories uploader + directoriesPutResponse, err := directoriesUploader.Done() + if err != nil { + log.WithError(err).Error("error during directory upload") + w.WriteHeader(http.StatusBadRequest) + _, err := w.Write([]byte("error during directory upload")) + if err != nil { + log.WithError(err).Errorf("unable to write error message to client") + } + + return + } + // If we uploaded directories (so directoriesPutResponse doesn't return null), + // the RootDigest field in directoriesPutResponse should match the digest + // returned in the PathInfo struct returned by the `Import` call. + // This check ensures the server-side came up with the same root hash. + + if directoriesPutResponse != nil { + rootDigestPathInfo := rootNode.GetDirectory().GetDigest() + rootDigestDirectoriesPutResponse := directoriesPutResponse.GetRootDigest() + + log := log.WithFields(logrus.Fields{ + "root_digest_pathinfo": rootDigestPathInfo, + "root_digest_directories_put_resp": rootDigestDirectoriesPutResponse, + }) + if !bytes.Equal(rootDigestPathInfo, rootDigestDirectoriesPutResponse) { + log.Errorf("returned root digest doesn't match what's calculated") + + w.WriteHeader(http.StatusBadRequest) + _, err := w.Write([]byte("error in root digest calculation")) + if err != nil { + log.WithError(err).Error("unable to write error message to client") + } + + return + } + } + + // Compare the nar hash specified in the URL with the one that has been + // calculated while processing the NAR file. + narHash, err := nixhash.FromHashTypeAndDigest(mh.SHA2_256, narSha256) + if err != nil { + panic("must parse nixbase32") + } + + if !bytes.Equal(narHashFromUrl.Digest(), narHash.Digest()) { + log := log.WithFields(logrus.Fields{ + "narhash_received_sha256": narHash.SRIString(), + "narsize": narSize, + }) + log.Error("received bytes don't match narhash from URL") + + w.WriteHeader(http.StatusBadRequest) + _, err := w.Write([]byte("received bytes don't match narHash specified in URL")) + if err != nil { + log.WithError(err).Errorf("unable to write error message to client") + } + + return + } + + // Insert the partial pathinfo structs into our lookup map, + // so requesting the NAR file will be possible. + // The same might exist already, but it'll have the same contents (so + // replacing will be a no-op), except maybe the root node Name field value, which + // is safe to ignore (as not part of the NAR). + s.narDbMu.Lock() + s.narDb[narHash.SRIString()] = &narData{ + rootNode: rootNode, + narSize: narSize, + } + s.narDbMu.Unlock() + + // Done! + }) + +} diff --git a/tvix/nar-bridge-go/pkg/http/narinfo.go b/tvix/nar-bridge-go/pkg/http/narinfo.go new file mode 100644 index 000000000000..e5b99a9505f1 --- /dev/null +++ b/tvix/nar-bridge-go/pkg/http/narinfo.go @@ -0,0 +1,51 @@ +package http + +import ( + "fmt" + + storev1pb "code.tvl.fyi/tvix/store-go" + mh "github.com/multiformats/go-multihash/core" + nixhash "github.com/nix-community/go-nix/pkg/hash" + + "github.com/nix-community/go-nix/pkg/narinfo" + "github.com/nix-community/go-nix/pkg/narinfo/signature" + "github.com/nix-community/go-nix/pkg/nixbase32" +) + +// ToNixNarInfo converts the PathInfo to a narinfo.NarInfo. +func ToNixNarInfo(p *storev1pb.PathInfo) (*narinfo.NarInfo, error) { + // ensure the PathInfo is valid, and extract the StorePath from the node in + // there. + storePath, err := p.Validate() + if err != nil { + return nil, fmt.Errorf("failed to validate PathInfo: %w", err) + } + + // convert the signatures from storev1pb signatures to narinfo signatures + narinfoSignatures := make([]signature.Signature, len(p.GetNarinfo().GetSignatures())) + for i, pathInfoSignature := range p.GetNarinfo().GetSignatures() { + narinfoSignatures[i] = signature.Signature{ + Name: pathInfoSignature.GetName(), + Data: pathInfoSignature.GetData(), + } + } + + // produce nixhash for the narsha256. + narHash, err := nixhash.FromHashTypeAndDigest( + mh.SHA2_256, + p.GetNarinfo().GetNarSha256(), + ) + if err != nil { + return nil, fmt.Errorf("invalid narsha256: %w", err) + } + + return &narinfo.NarInfo{ + StorePath: storePath.Absolute(), + URL: "nar/" + nixbase32.EncodeToString(narHash.Digest()) + ".nar", + Compression: "none", + NarHash: narHash, + NarSize: uint64(p.GetNarinfo().GetNarSize()), + References: p.GetNarinfo().GetReferenceNames(), + Signatures: narinfoSignatures, + }, nil +} diff --git a/tvix/nar-bridge-go/pkg/http/narinfo_get.go b/tvix/nar-bridge-go/pkg/http/narinfo_get.go new file mode 100644 index 000000000000..d43cb58078da --- /dev/null +++ b/tvix/nar-bridge-go/pkg/http/narinfo_get.go @@ -0,0 +1,137 @@ +package http + +import ( + "context" + "encoding/base64" + "errors" + "fmt" + "io" + "io/fs" + "net/http" + "strings" + "sync" + + storev1pb "code.tvl.fyi/tvix/store-go" + "github.com/go-chi/chi/v5" + nixhash "github.com/nix-community/go-nix/pkg/hash" + "github.com/nix-community/go-nix/pkg/nixbase32" + log "github.com/sirupsen/logrus" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// renderNarinfo writes narinfo contents to a passed io.Writer, or a returns a +// (wrapped) io.ErrNoExist error if something doesn't exist. +// if headOnly is set to true, only the existence is checked, but no content is +// actually written. +func renderNarinfo( + ctx context.Context, + log *log.Entry, + pathInfoServiceClient storev1pb.PathInfoServiceClient, + narHashToPathInfoMu *sync.Mutex, + narHashToPathInfo map[string]*narData, + outputHash []byte, + w io.Writer, + headOnly bool, +) error { + pathInfo, err := pathInfoServiceClient.Get(ctx, &storev1pb.GetPathInfoRequest{ + ByWhat: &storev1pb.GetPathInfoRequest_ByOutputHash{ + ByOutputHash: outputHash, + }, + }) + if err != nil { + st, ok := status.FromError(err) + if ok { + if st.Code() == codes.NotFound { + return fmt.Errorf("output hash %v not found: %w", base64.StdEncoding.EncodeToString(outputHash), fs.ErrNotExist) + } + return fmt.Errorf("unable to get pathinfo, code %v: %w", st.Code(), err) + } + + return fmt.Errorf("unable to get pathinfo: %w", err) + } + + log = log.WithField("pathInfo", pathInfo) + + if _, err := pathInfo.Validate(); err != nil { + log.WithError(err).Error("unable to validate PathInfo") + + return fmt.Errorf("unable to validate PathInfo: %w", err) + } + + if pathInfo.GetNarinfo() == nil { + log.Error("PathInfo doesn't contain Narinfo field") + + return fmt.Errorf("PathInfo doesn't contain Narinfo field") + } + + // extract the NARHash. This must succeed, as Validate() did succeed. + narHash, err := nixhash.FromHashTypeAndDigest(0x12, pathInfo.GetNarinfo().GetNarSha256()) + if err != nil { + panic("must parse NarHash") + } + + // add things to the lookup table, in case the same process didn't handle the NAR hash yet. + narHashToPathInfoMu.Lock() + narHashToPathInfo[narHash.SRIString()] = &narData{ + rootNode: pathInfo.GetNode(), + narSize: pathInfo.GetNarinfo().GetNarSize(), + } + narHashToPathInfoMu.Unlock() + + if headOnly { + return nil + } + + // convert the PathInfo to NARInfo. + narInfo, err := ToNixNarInfo(pathInfo) + + // Write it out to the client. + _, err = io.Copy(w, strings.NewReader(narInfo.String())) + if err != nil { + return fmt.Errorf("unable to write narinfo to client: %w", err) + } + + return nil +} + +func registerNarinfoGet(s *Server) { + // GET/HEAD $outHash.narinfo looks up the PathInfo from the tvix-store, + // and, if it's a GET request, render a .narinfo file to the client. + // In both cases it will keep the PathInfo in the lookup map, + // so a subsequent GET/HEAD /nar/ $narhash.nar request can find it. + genNarinfoHandler := func(isHead bool) func(w http.ResponseWriter, r *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + defer r.Body.Close() + + ctx := r.Context() + log := log.WithField("outputhash", chi.URLParamFromCtx(ctx, "outputhash")) + + // parse the output hash sent in the request URL + outputHash, err := nixbase32.DecodeString(chi.URLParamFromCtx(ctx, "outputhash")) + if err != nil { + log.WithError(err).Error("unable to decode output hash from url") + w.WriteHeader(http.StatusBadRequest) + _, err := w.Write([]byte("unable to decode output hash from url")) + if err != nil { + log.WithError(err).Errorf("unable to write error message to client") + } + + return + } + + err = renderNarinfo(ctx, log, s.pathInfoServiceClient, &s.narDbMu, s.narDb, outputHash, w, isHead) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + w.WriteHeader(http.StatusNotFound) + } else { + log.WithError(err).Warn("unable to render narinfo") + w.WriteHeader(http.StatusInternalServerError) + } + } + } + } + + s.handler.Get("/{outputhash:^["+nixbase32.Alphabet+"]{32}}.narinfo", genNarinfoHandler(false)) + s.handler.Head("/{outputhash:^["+nixbase32.Alphabet+"]{32}}.narinfo", genNarinfoHandler(true)) +} diff --git a/tvix/nar-bridge-go/pkg/http/narinfo_put.go b/tvix/nar-bridge-go/pkg/http/narinfo_put.go new file mode 100644 index 000000000000..0e2ae989c039 --- /dev/null +++ b/tvix/nar-bridge-go/pkg/http/narinfo_put.go @@ -0,0 +1,103 @@ +package http + +import ( + "net/http" + + "code.tvl.fyi/tvix/nar-bridge-go/pkg/importer" + "github.com/go-chi/chi/v5" + "github.com/nix-community/go-nix/pkg/narinfo" + "github.com/nix-community/go-nix/pkg/nixbase32" + "github.com/sirupsen/logrus" + log "github.com/sirupsen/logrus" +) + +func registerNarinfoPut(s *Server) { + s.handler.Put("/{outputhash:^["+nixbase32.Alphabet+"]{32}}.narinfo", func(w http.ResponseWriter, r *http.Request) { + defer r.Body.Close() + + ctx := r.Context() + log := log.WithField("outputhash", chi.URLParamFromCtx(ctx, "outputhash")) + + // TODO: decide on merging behaviour. + // Maybe it's fine to add if contents are the same, but more sigs can be added? + // Right now, just replace a .narinfo for a path that already exists. + + // read and parse the .narinfo file + narInfo, err := narinfo.Parse(r.Body) + if err != nil { + log.WithError(err).Error("unable to parse narinfo") + w.WriteHeader(http.StatusBadRequest) + _, err := w.Write([]byte("unable to parse narinfo")) + if err != nil { + log.WithError(err).Errorf("unable to write error message to client") + } + + return + } + + log = log.WithFields(logrus.Fields{ + "narhash": narInfo.NarHash.SRIString(), + "output_path": narInfo.StorePath, + }) + + // look up the narHash in our temporary map + s.narDbMu.Lock() + narData, found := s.narDb[narInfo.NarHash.SRIString()] + s.narDbMu.Unlock() + if !found { + log.Error("unable to find referred NAR") + w.WriteHeader(http.StatusBadRequest) + _, err := w.Write([]byte("unable to find referred NAR")) + if err != nil { + log.WithError(err).Errorf("unable to write error message to client") + } + + return + } + + rootNode := narData.rootNode + + // compare fields with what we computed while receiving the NAR file + + // NarSize needs to match + if narData.narSize != narInfo.NarSize { + log.Error("narsize mismatch") + w.WriteHeader(http.StatusBadRequest) + _, err := w.Write([]byte("unable to parse narinfo")) + if err != nil { + log.WithError(err).Errorf("unable to write error message to client") + } + + return + } + + pathInfo, err := importer.GenPathInfo(rootNode, narInfo) + if err != nil { + log.WithError(err).Error("unable to generate PathInfo") + + w.WriteHeader(http.StatusInternalServerError) + _, err := w.Write([]byte("unable to generate PathInfo")) + if err != nil { + log.WithError(err).Errorf("unable to write error message to client") + } + + return + } + + log.WithField("pathInfo", pathInfo).Debug("inserted new pathInfo") + + receivedPathInfo, err := s.pathInfoServiceClient.Put(ctx, pathInfo) + if err != nil { + log.WithError(err).Error("unable to upload pathinfo to service") + w.WriteHeader(http.StatusInternalServerError) + _, err := w.Write([]byte("unable to upload pathinfo to server")) + if err != nil { + log.WithError(err).Errorf("unable to write error message to client") + } + + return + } + + log.WithField("pathInfo", receivedPathInfo).Debug("got back PathInfo") + }) +} diff --git a/tvix/nar-bridge-go/pkg/http/server.go b/tvix/nar-bridge-go/pkg/http/server.go new file mode 100644 index 000000000000..fbcb20be18b7 --- /dev/null +++ b/tvix/nar-bridge-go/pkg/http/server.go @@ -0,0 +1,119 @@ +package http + +import ( + "context" + "fmt" + "net" + "net/http" + "strings" + "sync" + "time" + + castorev1pb "code.tvl.fyi/tvix/castore-go" + storev1pb "code.tvl.fyi/tvix/store-go" + "github.com/go-chi/chi/middleware" + "github.com/go-chi/chi/v5" + log "github.com/sirupsen/logrus" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" +) + +type Server struct { + srv *http.Server + handler chi.Router + + directoryServiceClient castorev1pb.DirectoryServiceClient + blobServiceClient castorev1pb.BlobServiceClient + pathInfoServiceClient storev1pb.PathInfoServiceClient + + // When uploading NAR files to a HTTP binary cache, the .nar + // files are uploaded before the .narinfo files. + // We need *both* to be able to fully construct a PathInfo object. + // Keep a in-memory map of narhash(es) (in SRI) to (unnamed) root node and nar + // size. + // This is necessary until we can ask a PathInfoService for a node with a given + // narSha256. + narDbMu sync.Mutex + narDb map[string]*narData +} + +type narData struct { + rootNode *castorev1pb.Node + narSize uint64 +} + +func New( + directoryServiceClient castorev1pb.DirectoryServiceClient, + blobServiceClient castorev1pb.BlobServiceClient, + pathInfoServiceClient storev1pb.PathInfoServiceClient, + enableAccessLog bool, + priority int, +) *Server { + r := chi.NewRouter() + r.Use(func(h http.Handler) http.Handler { + return otelhttp.NewHandler(h, "http.request") + }) + + if enableAccessLog { + r.Use(middleware.Logger) + } + + r.Get("/", func(w http.ResponseWriter, r *http.Request) { + _, err := w.Write([]byte("nar-bridge")) + if err != nil { + log.Errorf("Unable to write response: %v", err) + } + }) + + r.Get("/nix-cache-info", func(w http.ResponseWriter, r *http.Request) { + _, err := w.Write([]byte(fmt.Sprintf("StoreDir: /nix/store\nWantMassQuery: 1\nPriority: %d\n", priority))) + if err != nil { + log.Errorf("Unable to write response: %v", err) + } + }) + + s := &Server{ + handler: r, + directoryServiceClient: directoryServiceClient, + blobServiceClient: blobServiceClient, + pathInfoServiceClient: pathInfoServiceClient, + narDb: make(map[string]*narData), + } + + registerNarPut(s) + registerNarinfoPut(s) + + registerNarinfoGet(s) + registerNarGet(s) + + return s +} + +func (s *Server) Shutdown(ctx context.Context) error { + return s.srv.Shutdown(ctx) +} + +// ListenAndServer starts the webserver, and waits for it being closed or +// shutdown, after which it'll return ErrServerClosed. +func (s *Server) ListenAndServe(addr string) error { + s.srv = &http.Server{ + Handler: s.handler, + ReadTimeout: 500 * time.Second, + WriteTimeout: 500 * time.Second, + IdleTimeout: 500 * time.Second, + } + + var listener net.Listener + var err error + + // check addr. If it contains slashes, assume it's a unix domain socket. + if strings.Contains(addr, "/") { + listener, err = net.Listen("unix", addr) + } else { + listener, err = net.Listen("tcp", addr) + } + if err != nil { + return fmt.Errorf("unable to listen on %v: %w", addr, err) + } + + return s.srv.Serve(listener) +} diff --git a/tvix/nar-bridge-go/pkg/http/util.go b/tvix/nar-bridge-go/pkg/http/util.go new file mode 100644 index 000000000000..60febea1f430 --- /dev/null +++ b/tvix/nar-bridge-go/pkg/http/util.go @@ -0,0 +1,24 @@ +package http + +import ( + "fmt" + nixhash "github.com/nix-community/go-nix/pkg/hash" +) + +// parseNarHashFromUrl parses a nixbase32 string representing a sha256 NarHash +// and returns a nixhash.Hash when it was able to parse, or an error. +func parseNarHashFromUrl(narHashFromUrl string) (*nixhash.Hash, error) { + // peek at the length. If it's 52 characters, assume sha256, + // if it's something else, this is an error. + l := len(narHashFromUrl) + if l != 52 { + return nil, fmt.Errorf("invalid length of narHash: %v", l) + } + + nixHash, err := nixhash.ParseNixBase32("sha256:" + narHashFromUrl) + if err != nil { + return nil, fmt.Errorf("unable to parse nixbase32 hash: %w", err) + } + + return nixHash, nil +} diff --git a/tvix/nar-bridge-go/pkg/importer/blob_upload.go b/tvix/nar-bridge-go/pkg/importer/blob_upload.go new file mode 100644 index 000000000000..c1255dd3ad5d --- /dev/null +++ b/tvix/nar-bridge-go/pkg/importer/blob_upload.go @@ -0,0 +1,71 @@ +package importer + +import ( + "bufio" + "context" + "encoding/base64" + "errors" + "fmt" + "io" + + castorev1pb "code.tvl.fyi/tvix/castore-go" + log "github.com/sirupsen/logrus" +) + +// the size of individual BlobChunk we send when uploading to BlobService. +const chunkSize = 1024 * 1024 + +// this produces a callback function that can be used as blobCb for the +// importer.Import function call. +func GenBlobUploaderCb(ctx context.Context, blobServiceClient castorev1pb.BlobServiceClient) func(io.Reader) ([]byte, error) { + return func(blobReader io.Reader) ([]byte, error) { + // Ensure the blobReader is buffered to at least the chunk size. + blobReader = bufio.NewReaderSize(blobReader, chunkSize) + + putter, err := blobServiceClient.Put(ctx) + if err != nil { + // return error to the importer + return nil, fmt.Errorf("error from blob service: %w", err) + } + + blobSize := 0 + chunk := make([]byte, chunkSize) + + for { + n, err := blobReader.Read(chunk) + if err != nil && !errors.Is(err, io.EOF) { + return nil, fmt.Errorf("unable to read from blobreader: %w", err) + } + + if n != 0 { + log.WithField("chunk_size", n).Debug("sending chunk") + blobSize += n + + // send the blob chunk to the server. The err is only valid in the inner scope + if err := putter.Send(&castorev1pb.BlobChunk{ + Data: chunk[:n], + }); err != nil { + return nil, fmt.Errorf("sending blob chunk: %w", err) + } + } + + // if our read from blobReader returned an EOF, we're done reading + if errors.Is(err, io.EOF) { + break + } + + } + + resp, err := putter.CloseAndRecv() + if err != nil { + return nil, fmt.Errorf("close blob putter: %w", err) + } + + log.WithFields(log.Fields{ + "blob_digest": base64.StdEncoding.EncodeToString(resp.GetDigest()), + "blob_size": blobSize, + }).Debug("uploaded blob") + + return resp.GetDigest(), nil + } +} diff --git a/tvix/nar-bridge-go/pkg/importer/counting_writer.go b/tvix/nar-bridge-go/pkg/importer/counting_writer.go new file mode 100644 index 000000000000..d003a4b11bfd --- /dev/null +++ b/tvix/nar-bridge-go/pkg/importer/counting_writer.go @@ -0,0 +1,21 @@ +package importer + +import ( + "io" +) + +// CountingWriter implements io.Writer. +var _ io.Writer = &CountingWriter{} + +type CountingWriter struct { + bytesWritten uint64 +} + +func (cw *CountingWriter) Write(p []byte) (n int, err error) { + cw.bytesWritten += uint64(len(p)) + return len(p), nil +} + +func (cw *CountingWriter) BytesWritten() uint64 { + return cw.bytesWritten +} diff --git a/tvix/nar-bridge-go/pkg/importer/directory_upload.go b/tvix/nar-bridge-go/pkg/importer/directory_upload.go new file mode 100644 index 000000000000..117f442fa54f --- /dev/null +++ b/tvix/nar-bridge-go/pkg/importer/directory_upload.go @@ -0,0 +1,88 @@ +package importer + +import ( + "bytes" + "context" + "encoding/base64" + "fmt" + + castorev1pb "code.tvl.fyi/tvix/castore-go" + log "github.com/sirupsen/logrus" +) + +// DirectoriesUploader opens a Put stream when it receives the first Put() call, +// and then uses the opened stream for subsequent Put() calls. +// When the uploading is finished, a call to Done() will close the stream and +// return the root digest returned from the directoryServiceClient. +type DirectoriesUploader struct { + ctx context.Context + directoryServiceClient castorev1pb.DirectoryServiceClient + directoryServicePutStream castorev1pb.DirectoryService_PutClient + lastDirectoryDigest []byte +} + +func NewDirectoriesUploader(ctx context.Context, directoryServiceClient castorev1pb.DirectoryServiceClient) *DirectoriesUploader { + return &DirectoriesUploader{ + ctx: ctx, + directoryServiceClient: directoryServiceClient, + directoryServicePutStream: nil, + } +} + +func (du *DirectoriesUploader) Put(directory *castorev1pb.Directory) ([]byte, error) { + directoryDigest, err := directory.Digest() + if err != nil { + return nil, fmt.Errorf("failed calculating directory digest: %w", err) + } + + // Send the directory to the directory service + // If the stream hasn't been initialized yet, do it first + if du.directoryServicePutStream == nil { + directoryServicePutStream, err := du.directoryServiceClient.Put(du.ctx) + if err != nil { + return nil, fmt.Errorf("unable to initialize directory service put stream: %v", err) + } + du.directoryServicePutStream = directoryServicePutStream + } + + // send the directory out + err = du.directoryServicePutStream.Send(directory) + if err != nil { + return nil, fmt.Errorf("error sending directory: %w", err) + } + log.WithField("digest", base64.StdEncoding.EncodeToString(directoryDigest)).Debug("uploaded directory") + + // update lastDirectoryDigest + du.lastDirectoryDigest = directoryDigest + + return directoryDigest, nil +} + +// Done closes the stream and returns the response. +// It returns null if closed for a second time. +func (du *DirectoriesUploader) Done() (*castorev1pb.PutDirectoryResponse, error) { + // only close once, and only if we opened. + if du.directoryServicePutStream == nil { + return nil, nil + } + + putDirectoryResponse, err := du.directoryServicePutStream.CloseAndRecv() + if err != nil { + return nil, fmt.Errorf("unable to close directory service put stream: %v", err) + } + + // ensure the response contains the same digest as the one we have in lastDirectoryDigest. + // Otherwise, the backend came up with another digest than we, in which we return an error. + if !bytes.Equal(du.lastDirectoryDigest, putDirectoryResponse.RootDigest) { + return nil, fmt.Errorf( + "backend calculated different root digest as we, expected %s, actual %s", + base64.StdEncoding.EncodeToString(du.lastDirectoryDigest), + base64.StdEncoding.EncodeToString(putDirectoryResponse.RootDigest), + ) + } + + // clear directoryServicePutStream. + du.directoryServicePutStream = nil + + return putDirectoryResponse, nil +} diff --git a/tvix/nar-bridge-go/pkg/importer/gen_pathinfo.go b/tvix/nar-bridge-go/pkg/importer/gen_pathinfo.go new file mode 100644 index 000000000000..bdc298a9a399 --- /dev/null +++ b/tvix/nar-bridge-go/pkg/importer/gen_pathinfo.go @@ -0,0 +1,62 @@ +package importer + +import ( + castorev1pb "code.tvl.fyi/tvix/castore-go" + storev1pb "code.tvl.fyi/tvix/store-go" + "fmt" + "github.com/nix-community/go-nix/pkg/narinfo" + "github.com/nix-community/go-nix/pkg/storepath" +) + +// GenPathInfo takes a rootNode and narInfo and assembles a PathInfo. +// The rootNode is renamed to match the StorePath in the narInfo. +func GenPathInfo(rootNode *castorev1pb.Node, narInfo *narinfo.NarInfo) (*storev1pb.PathInfo, error) { + // parse the storePath from the .narinfo + storePath, err := storepath.FromAbsolutePath(narInfo.StorePath) + if err != nil { + return nil, fmt.Errorf("unable to parse StorePath: %w", err) + } + + // construct the references, by parsing ReferenceNames and extracting the digest + references := make([][]byte, len(narInfo.References)) + for i, referenceStr := range narInfo.References { + // parse reference as store path + referenceStorePath, err := storepath.FromString(referenceStr) + if err != nil { + return nil, fmt.Errorf("unable to parse reference %s as storepath: %w", referenceStr, err) + } + references[i] = referenceStorePath.Digest + } + + // construct the narInfo.Signatures[*] from pathInfo.Narinfo.Signatures[*] + narinfoSignatures := make([]*storev1pb.NARInfo_Signature, len(narInfo.Signatures)) + for i, narinfoSig := range narInfo.Signatures { + narinfoSignatures[i] = &storev1pb.NARInfo_Signature{ + Name: narinfoSig.Name, + Data: narinfoSig.Data, + } + } + + // assemble the PathInfo. + pathInfo := &storev1pb.PathInfo{ + // embed a new root node with the name set to the store path basename. + Node: castorev1pb.RenamedNode(rootNode, storePath.String()), + References: references, + Narinfo: &storev1pb.NARInfo{ + NarSize: narInfo.NarSize, + NarSha256: narInfo.FileHash.Digest(), + Signatures: narinfoSignatures, + ReferenceNames: narInfo.References, + }, + } + + // run Validate on the PathInfo, more as an additional sanity check our code is sound, + // to make sure we populated everything properly, before returning it. + // Fail hard if we fail validation, this is a code error. + if _, err = pathInfo.Validate(); err != nil { + panic(fmt.Sprintf("PathInfo failed validation: %v", err)) + } + + return pathInfo, nil + +} diff --git a/tvix/nar-bridge-go/pkg/importer/importer.go b/tvix/nar-bridge-go/pkg/importer/importer.go new file mode 100644 index 000000000000..fce6c5f293da --- /dev/null +++ b/tvix/nar-bridge-go/pkg/importer/importer.go @@ -0,0 +1,303 @@ +package importer + +import ( + "bytes" + "context" + "crypto/sha256" + "errors" + "fmt" + "io" + "path" + "strings" + + castorev1pb "code.tvl.fyi/tvix/castore-go" + "github.com/nix-community/go-nix/pkg/nar" + "golang.org/x/sync/errgroup" + "lukechampine.com/blake3" +) + +const ( + // asyncUploadThreshold controls when a file is buffered into memory and uploaded + // asynchronously. Files must be smaller than the threshold to be uploaded asynchronously. + asyncUploadThreshold = 1024 * 1024 // 1 MiB + // maxAsyncUploadBufferBytes is the maximum number of async blob uploads allowed to be + // running concurrently at any given time for a simple import operation. + maxConcurrentAsyncUploads = 128 +) + +// An item on the directories stack +type stackItem struct { + path string + directory *castorev1pb.Directory +} + +// Import reads a NAR from a reader, and returns a the root node, +// NAR size and NAR sha256 digest. +func Import( + // a context, to support cancellation + ctx context.Context, + // The reader the data is read from + r io.Reader, + // callback function called with each regular file content + blobCb func(fileReader io.Reader) ([]byte, error), + // callback function called with each finalized directory node + directoryCb func(directory *castorev1pb.Directory) ([]byte, error), +) (*castorev1pb.Node, uint64, []byte, error) { + // We need to wrap the underlying reader a bit. + // - we want to keep track of the number of bytes read in total + // - we calculate the sha256 digest over all data read + // Express these two things in a MultiWriter, and give the NAR reader a + // TeeReader that writes to it. + narCountW := &CountingWriter{} + sha256W := sha256.New() + multiW := io.MultiWriter(narCountW, sha256W) + narReader, err := nar.NewReader(io.TeeReader(r, multiW)) + if err != nil { + return nil, 0, nil, fmt.Errorf("failed to instantiate nar reader: %w", err) + } + defer narReader.Close() + + // If we store a symlink or regular file at the root, these are not nil. + // If they are nil, we instead have a stackDirectory. + var rootSymlink *castorev1pb.SymlinkNode + var rootFile *castorev1pb.FileNode + var stackDirectory *castorev1pb.Directory + + // Keep track of all asynch blob uploads so we can make sure they all succeed + // before returning. + var asyncBlobWg errgroup.Group + asyncBlobWg.SetLimit(maxConcurrentAsyncUploads) + + var stack = []stackItem{} + + // popFromStack is used when we transition to a different directory or + // drain the stack when we reach the end of the NAR. + // It adds the popped element to the element underneath if any, + // and passes it to the directoryCb callback. + // This function may only be called if the stack is not already empty. + popFromStack := func() error { + // Keep the top item, and "resize" the stack slice. + // This will only make the last element unaccessible, but chances are high + // we're re-using that space anyways. + toPop := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + // call the directoryCb + directoryDigest, err := directoryCb(toPop.directory) + if err != nil { + return fmt.Errorf("failed calling directoryCb: %w", err) + } + + // if there's still a parent left on the stack, refer to it from there. + if len(stack) > 0 { + topOfStack := stack[len(stack)-1].directory + topOfStack.Directories = append(topOfStack.Directories, &castorev1pb.DirectoryNode{ + Name: []byte(path.Base(toPop.path)), + Digest: directoryDigest, + Size: toPop.directory.Size(), + }) + } + // Keep track that we have encounter at least one directory + stackDirectory = toPop.directory + return nil + } + + getBasename := func(p string) string { + // extract the basename. In case of "/", replace with empty string. + basename := path.Base(p) + if basename == "/" { + basename = "" + } + return basename + } + + for { + select { + case <-ctx.Done(): + return nil, 0, nil, ctx.Err() + default: + // call narReader.Next() to get the next element + hdr, err := narReader.Next() + + // If this returns an error, it's either EOF (when we're done reading from the NAR), + // or another error. + if err != nil { + // if this returns no EOF, bail out + if !errors.Is(err, io.EOF) { + return nil, 0, nil, fmt.Errorf("failed getting next nar element: %w", err) + } + + // The NAR has been read all the way to the end… + // Make sure we close the nar reader, which might read some final trailers. + if err := narReader.Close(); err != nil { + return nil, 0, nil, fmt.Errorf("unable to close nar reader: %w", err) + } + + // Check the stack. While it's not empty, we need to pop things off the stack. + for len(stack) > 0 { + err := popFromStack() + if err != nil { + return nil, 0, nil, fmt.Errorf("unable to pop from stack: %w", err) + } + } + + // Wait for any pending blob uploads. + err := asyncBlobWg.Wait() + if err != nil { + return nil, 0, nil, fmt.Errorf("async blob upload: %w", err) + } + + // Stack is empty. + // Now either root{File,Symlink,Directory} is not nil, + // and we can return the root node. + narSize := narCountW.BytesWritten() + narSha256 := sha256W.Sum(nil) + + if rootFile != nil { + return &castorev1pb.Node{ + Node: &castorev1pb.Node_File{ + File: rootFile, + }, + }, narSize, narSha256, nil + } else if rootSymlink != nil { + return &castorev1pb.Node{ + Node: &castorev1pb.Node_Symlink{ + Symlink: rootSymlink, + }, + }, narSize, narSha256, nil + } else if stackDirectory != nil { + // calculate directory digest (i.e. after we received all its contents) + dgst, err := stackDirectory.Digest() + if err != nil { + return nil, 0, nil, fmt.Errorf("unable to calculate root directory digest: %w", err) + } + + return &castorev1pb.Node{ + Node: &castorev1pb.Node_Directory{ + Directory: &castorev1pb.DirectoryNode{ + Name: []byte{}, + Digest: dgst, + Size: stackDirectory.Size(), + }, + }, + }, narSize, narSha256, nil + } else { + return nil, 0, nil, fmt.Errorf("no root set") + } + } + + // Check for valid path transitions, pop from stack if needed + // The nar reader already gives us some guarantees about ordering and illegal transitions, + // So we really only need to check if the top-of-stack path is a prefix of the path, + // and if it's not, pop from the stack. We do this repeatedly until the top of the stack is + // the subdirectory the new entry is in, or we hit the root directory. + + // We don't need to worry about the root node case, because we can only finish the root "/" + // If we're at the end of the NAR reader (covered by the EOF check) + for len(stack) > 1 && !strings.HasPrefix(hdr.Path, stack[len(stack)-1].path+"/") { + err := popFromStack() + if err != nil { + return nil, 0, nil, fmt.Errorf("unable to pop from stack: %w", err) + } + } + + if hdr.Type == nar.TypeSymlink { + symlinkNode := &castorev1pb.SymlinkNode{ + Name: []byte(getBasename(hdr.Path)), + Target: []byte(hdr.LinkTarget), + } + if len(stack) > 0 { + topOfStack := stack[len(stack)-1].directory + topOfStack.Symlinks = append(topOfStack.Symlinks, symlinkNode) + } else { + rootSymlink = symlinkNode + } + + } + if hdr.Type == nar.TypeRegular { + uploadBlob := func(r io.Reader) ([]byte, error) { + // wrap reader with a reader counting the number of bytes read + blobCountW := &CountingWriter{} + blobReader := io.TeeReader(r, blobCountW) + + blobDigest, err := blobCb(blobReader) + if err != nil { + return nil, fmt.Errorf("failure from blobCb: %w", err) + } + + // ensure blobCb did read all the way to the end. + // If it didn't, the blobCb function is wrong and we should bail out. + if blobCountW.BytesWritten() != uint64(hdr.Size) { + return nil, fmt.Errorf("blobCb did not read all: %d/%d bytes", blobCountW.BytesWritten(), hdr.Size) + } + + return blobDigest, nil + } + + h := blake3.New(32, nil) + blobReader := io.TeeReader(narReader, io.MultiWriter(h)) + var blobDigest []byte + + // If this file is small enough, read it off the wire immediately and + // upload to the blob service asynchronously. This helps reduce the + // RTT on blob uploads for NARs with many small files. + doAsync := hdr.Size < asyncUploadThreshold + if doAsync { + blobContents, err := io.ReadAll(blobReader) + if err != nil { + return nil, 0, nil, fmt.Errorf("read blob: %w", err) + } + + blobDigest = h.Sum(nil) + + asyncBlobWg.Go(func() error { + blobDigestFromCb, err := uploadBlob(bytes.NewReader(blobContents)) + if err != nil { + return err + } + + if !bytes.Equal(blobDigest, blobDigestFromCb) { + return fmt.Errorf("unexpected digest (got %x, expected %x)", blobDigestFromCb, blobDigest) + } + + return nil + }) + } else { + blobDigestFromCb, err := uploadBlob(blobReader) + if err != nil { + return nil, 0, nil, fmt.Errorf("upload blob: %w", err) + } + + blobDigest = h.Sum(nil) + if !bytes.Equal(blobDigest, blobDigestFromCb) { + return nil, 0, nil, fmt.Errorf("unexpected digest (got %x, expected %x)", blobDigestFromCb, blobDigest) + } + } + + fileNode := &castorev1pb.FileNode{ + Name: []byte(getBasename(hdr.Path)), + Digest: blobDigest, + Size: uint64(hdr.Size), + Executable: hdr.Executable, + } + if len(stack) > 0 { + topOfStack := stack[len(stack)-1].directory + topOfStack.Files = append(topOfStack.Files, fileNode) + } else { + rootFile = fileNode + } + } + if hdr.Type == nar.TypeDirectory { + directory := &castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + stack = append(stack, stackItem{ + directory: directory, + path: hdr.Path, + }) + } + } + } +} diff --git a/tvix/nar-bridge-go/pkg/importer/importer_test.go b/tvix/nar-bridge-go/pkg/importer/importer_test.go new file mode 100644 index 000000000000..313677084f71 --- /dev/null +++ b/tvix/nar-bridge-go/pkg/importer/importer_test.go @@ -0,0 +1,537 @@ +package importer_test + +import ( + "bytes" + "context" + "errors" + "io" + "os" + "testing" + + castorev1pb "code.tvl.fyi/tvix/castore-go" + "code.tvl.fyi/tvix/nar-bridge-go/pkg/importer" + "github.com/stretchr/testify/require" +) + +func TestSymlink(t *testing.T) { + f, err := os.Open("../../testdata/symlink.nar") + require.NoError(t, err) + + rootNode, narSize, narSha256, err := importer.Import( + context.Background(), + f, + func(blobReader io.Reader) ([]byte, error) { + panic("no file contents expected!") + }, func(directory *castorev1pb.Directory) ([]byte, error) { + panic("no directories expected!") + }, + ) + require.NoError(t, err) + require.Equal(t, &castorev1pb.Node{ + Node: &castorev1pb.Node_Symlink{ + Symlink: &castorev1pb.SymlinkNode{ + Name: []byte(""), + Target: []byte("/nix/store/somewhereelse"), + }, + }, + }, rootNode) + require.Equal(t, []byte{ + 0x09, 0x7d, 0x39, 0x7e, 0x9b, 0x58, 0x26, 0x38, 0x4e, 0xaa, 0x16, 0xc4, 0x57, 0x71, 0x5d, 0x1c, 0x1a, 0x51, 0x67, 0x03, 0x13, 0xea, 0xd0, 0xf5, 0x85, 0x66, 0xe0, 0xb2, 0x32, 0x53, 0x9c, 0xf1, + }, narSha256) + require.Equal(t, uint64(136), narSize) +} + +func TestRegular(t *testing.T) { + f, err := os.Open("../../testdata/onebyteregular.nar") + require.NoError(t, err) + + rootNode, narSize, narSha256, err := importer.Import( + context.Background(), + f, + func(blobReader io.Reader) ([]byte, error) { + contents, err := io.ReadAll(blobReader) + require.NoError(t, err, "reading blobReader should not error") + require.Equal(t, []byte{0x01}, contents, "contents read from blobReader should match expectations") + return mustBlobDigest(bytes.NewBuffer(contents)), nil + }, func(directory *castorev1pb.Directory) ([]byte, error) { + panic("no directories expected!") + }, + ) + + // The blake3 digest of the 0x01 byte. + BLAKE3_DIGEST_0X01 := []byte{ + 0x48, 0xfc, 0x72, 0x1f, 0xbb, 0xc1, 0x72, 0xe0, 0x92, 0x5f, 0xa2, 0x7a, 0xf1, 0x67, 0x1d, + 0xe2, 0x25, 0xba, 0x92, 0x71, 0x34, 0x80, 0x29, 0x98, 0xb1, 0x0a, 0x15, 0x68, 0xa1, 0x88, + 0x65, 0x2b, + } + + require.NoError(t, err) + require.Equal(t, &castorev1pb.Node{ + Node: &castorev1pb.Node_File{ + File: &castorev1pb.FileNode{ + Name: []byte(""), + Digest: BLAKE3_DIGEST_0X01, + Size: 1, + Executable: false, + }, + }, + }, rootNode) + require.Equal(t, []byte{ + 0x73, 0x08, 0x50, 0xa8, 0x11, 0x25, 0x9d, 0xbf, 0x3a, 0x68, 0xdc, 0x2e, 0xe8, 0x7a, 0x79, 0xaa, 0x6c, 0xae, 0x9f, 0x71, 0x37, 0x5e, 0xdf, 0x39, 0x6f, 0x9d, 0x7a, 0x91, 0xfb, 0xe9, 0x13, 0x4d, + }, narSha256) + require.Equal(t, uint64(120), narSize) +} + +func TestEmptyDirectory(t *testing.T) { + f, err := os.Open("../../testdata/emptydirectory.nar") + require.NoError(t, err) + + expectedDirectory := &castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{}, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + rootNode, narSize, narSha256, err := importer.Import( + context.Background(), + f, + func(blobReader io.Reader) ([]byte, error) { + panic("no file contents expected!") + }, func(directory *castorev1pb.Directory) ([]byte, error) { + requireProtoEq(t, expectedDirectory, directory) + return mustDirectoryDigest(directory), nil + }, + ) + require.NoError(t, err) + require.Equal(t, &castorev1pb.Node{ + Node: &castorev1pb.Node_Directory{ + Directory: &castorev1pb.DirectoryNode{ + Name: []byte(""), + Digest: mustDirectoryDigest(expectedDirectory), + Size: expectedDirectory.Size(), + }, + }, + }, rootNode) + require.Equal(t, []byte{ + 0xa5, 0x0a, 0x5a, 0xb6, 0xd9, 0x92, 0xf5, 0x59, 0x8e, 0xdd, 0x92, 0x10, 0x50, 0x59, 0xfa, 0xe9, 0xac, 0xfc, 0x19, 0x29, 0x81, 0xe0, 0x8b, 0xd8, 0x85, 0x34, 0xc2, 0x16, 0x7e, 0x92, 0x52, 0x6a, + }, narSha256) + require.Equal(t, uint64(96), narSize) +} + +func TestFull(t *testing.T) { + f, err := os.Open("../../testdata/nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar") + require.NoError(t, err) + + expectedDirectoryPaths := []string{ + "/bin", + "/share/man/man1", + "/share/man/man5", + "/share/man/man8", + "/share/man", + "/share", + "/", + } + expectedDirectories := make(map[string]*castorev1pb.Directory, len(expectedDirectoryPaths)) + + // /bin is a leaf directory + expectedDirectories["/bin"] = &castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{}, + Files: []*castorev1pb.FileNode{ + { + Name: []byte("arp"), + Digest: []byte{ + 0xfb, 0xc4, 0x61, 0x4a, 0x29, 0x27, 0x11, 0xcb, 0xcc, 0xe4, 0x99, 0x81, 0x9c, 0xf0, 0xa9, 0x17, 0xf7, 0xd0, 0x91, 0xbe, 0xea, 0x08, 0xcb, 0x5b, 0xaa, 0x76, 0x76, 0xf5, 0xee, 0x4f, 0x82, 0xbb, + }, + Size: 55288, + Executable: true, + }, + { + Name: []byte("hostname"), + Digest: []byte{ + 0x9c, 0x6a, 0xe4, 0xb5, 0xe4, 0x6c, 0xb5, 0x67, 0x45, 0x0e, 0xaa, 0x2a, 0xd8, 0xdd, 0x9b, 0x38, 0xd7, 0xed, 0x01, 0x02, 0x84, 0xf7, 0x26, 0xe1, 0xc7, 0xf3, 0x1c, 0xeb, 0xaa, 0x8a, 0x01, 0x30, + }, + Size: 17704, + Executable: true, + }, + { + Name: []byte("ifconfig"), + Digest: []byte{ + 0x25, 0xbe, 0x3b, 0x1d, 0xf4, 0x1a, 0x45, 0x42, 0x79, 0x09, 0x2c, 0x2a, 0x83, 0xf0, 0x0b, 0xff, 0xe8, 0xc0, 0x9c, 0x26, 0x98, 0x70, 0x15, 0x4d, 0xa8, 0xca, 0x05, 0xfe, 0x92, 0x68, 0x35, 0x2e, + }, + Size: 72576, + Executable: true, + }, + { + Name: []byte("nameif"), + Digest: []byte{ + 0x8e, 0xaa, 0xc5, 0xdb, 0x71, 0x08, 0x8e, 0xe5, 0xe6, 0x30, 0x1f, 0x2c, 0x3a, 0xf2, 0x42, 0x39, 0x0c, 0x57, 0x15, 0xaf, 0x50, 0xaa, 0x1c, 0xdf, 0x84, 0x22, 0x08, 0x77, 0x03, 0x54, 0x62, 0xb1, + }, + Size: 18776, + Executable: true, + }, + { + Name: []byte("netstat"), + Digest: []byte{ + 0x13, 0x34, 0x7e, 0xdd, 0x2a, 0x9a, 0x17, 0x0b, 0x3f, 0xc7, 0x0a, 0xe4, 0x92, 0x89, 0x25, 0x9f, 0xaa, 0xb5, 0x05, 0x6b, 0x24, 0xa7, 0x91, 0xeb, 0xaf, 0xf9, 0xe9, 0x35, 0x56, 0xaa, 0x2f, 0xb2, + }, + Size: 131784, + Executable: true, + }, + { + Name: []byte("plipconfig"), + Digest: []byte{ + 0x19, 0x7c, 0x80, 0xdc, 0x81, 0xdc, 0xb4, 0xc0, 0x45, 0xe1, 0xf9, 0x76, 0x51, 0x4f, 0x50, 0xbf, 0xa4, 0x69, 0x51, 0x9a, 0xd4, 0xa9, 0xe7, 0xaa, 0xe7, 0x0d, 0x53, 0x32, 0xff, 0x28, 0x40, 0x60, + }, + Size: 13160, + Executable: true, + }, + { + Name: []byte("rarp"), + Digest: []byte{ + 0x08, 0x85, 0xb4, 0x85, 0x03, 0x2b, 0x3c, 0x7a, 0x3e, 0x24, 0x4c, 0xf8, 0xcc, 0x45, 0x01, 0x9e, 0x79, 0x43, 0x8c, 0x6f, 0x5e, 0x32, 0x46, 0x54, 0xb6, 0x68, 0x91, 0x8e, 0xa0, 0xcb, 0x6e, 0x0d, + }, + Size: 30384, + Executable: true, + }, + { + Name: []byte("route"), + Digest: []byte{ + 0x4d, 0x14, 0x20, 0x89, 0x9e, 0x76, 0xf4, 0xe2, 0x92, 0x53, 0xee, 0x9b, 0x78, 0x7d, 0x23, 0x80, 0x6c, 0xff, 0xe6, 0x33, 0xdc, 0x4a, 0x10, 0x29, 0x39, 0x02, 0xa0, 0x60, 0xff, 0xe2, 0xbb, 0xd7, + }, + Size: 61928, + Executable: true, + }, + { + Name: []byte("slattach"), + Digest: []byte{ + 0xfb, 0x25, 0xc3, 0x73, 0xb7, 0xb1, 0x0b, 0x25, 0xcd, 0x7b, 0x62, 0xf6, 0x71, 0x83, 0xfe, 0x36, 0x80, 0xf6, 0x48, 0xc3, 0xdb, 0xd8, 0x0c, 0xfe, 0xb8, 0xd3, 0xda, 0x32, 0x9b, 0x47, 0x4b, 0x05, + }, + Size: 35672, + Executable: true, + }, + }, + Symlinks: []*castorev1pb.SymlinkNode{ + { + Name: []byte("dnsdomainname"), + Target: []byte("hostname"), + }, + { + Name: []byte("domainname"), + Target: []byte("hostname"), + }, + { + Name: []byte("nisdomainname"), + Target: []byte("hostname"), + }, + { + Name: []byte("ypdomainname"), + Target: []byte("hostname"), + }, + }, + } + + // /share/man/man1 is a leaf directory. + // The parser traversed over /sbin, but only added it to / which is still on the stack. + expectedDirectories["/share/man/man1"] = &castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{}, + Files: []*castorev1pb.FileNode{ + { + Name: []byte("dnsdomainname.1.gz"), + Digest: []byte{ + 0x98, 0x8a, 0xbd, 0xfa, 0x64, 0xd5, 0xb9, 0x27, 0xfe, 0x37, 0x43, 0x56, 0xb3, 0x18, 0xc7, 0x2b, 0xcb, 0xe3, 0x17, 0x1c, 0x17, 0xf4, 0x17, 0xeb, 0x4a, 0xa4, 0x99, 0x64, 0x39, 0xca, 0x2d, 0xee, + }, + Size: 40, + Executable: false, + }, + { + Name: []byte("domainname.1.gz"), + Digest: []byte{ + 0x98, 0x8a, 0xbd, 0xfa, 0x64, 0xd5, 0xb9, 0x27, 0xfe, 0x37, 0x43, 0x56, 0xb3, 0x18, 0xc7, 0x2b, 0xcb, 0xe3, 0x17, 0x1c, 0x17, 0xf4, 0x17, 0xeb, 0x4a, 0xa4, 0x99, 0x64, 0x39, 0xca, 0x2d, 0xee, + }, + Size: 40, + Executable: false, + }, + { + Name: []byte("hostname.1.gz"), + Digest: []byte{ + 0xbf, 0x89, 0xe6, 0x28, 0x00, 0x24, 0x66, 0x79, 0x70, 0x04, 0x38, 0xd6, 0xdd, 0x9d, 0xf6, 0x0e, 0x0d, 0xee, 0x00, 0xf7, 0x64, 0x4f, 0x05, 0x08, 0x9d, 0xf0, 0x36, 0xde, 0x85, 0xf4, 0x75, 0xdb, + }, + Size: 1660, + Executable: false, + }, + { + Name: []byte("nisdomainname.1.gz"), + Digest: []byte{ + 0x98, 0x8a, 0xbd, 0xfa, 0x64, 0xd5, 0xb9, 0x27, 0xfe, 0x37, 0x43, 0x56, 0xb3, 0x18, 0xc7, 0x2b, 0xcb, 0xe3, 0x17, 0x1c, 0x17, 0xf4, 0x17, 0xeb, 0x4a, 0xa4, 0x99, 0x64, 0x39, 0xca, 0x2d, 0xee, + }, + Size: 40, + Executable: false, + }, + { + Name: []byte("ypdomainname.1.gz"), + Digest: []byte{ + 0x98, 0x8a, 0xbd, 0xfa, 0x64, 0xd5, 0xb9, 0x27, 0xfe, 0x37, 0x43, 0x56, 0xb3, 0x18, 0xc7, 0x2b, 0xcb, 0xe3, 0x17, 0x1c, 0x17, 0xf4, 0x17, 0xeb, 0x4a, 0xa4, 0x99, 0x64, 0x39, 0xca, 0x2d, 0xee, + }, + Size: 40, + Executable: false, + }, + }, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + // /share/man/man5 is a leaf directory + expectedDirectories["/share/man/man5"] = &castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{}, + Files: []*castorev1pb.FileNode{ + { + Name: []byte("ethers.5.gz"), + Digest: []byte{ + 0x42, 0x63, 0x8c, 0xc4, 0x18, 0x93, 0xcf, 0x60, 0xd6, 0xff, 0x43, 0xbc, 0x16, 0xb4, 0xfd, 0x22, 0xd2, 0xf2, 0x05, 0x0b, 0x52, 0xdc, 0x6a, 0x6b, 0xff, 0x34, 0xe2, 0x6a, 0x38, 0x3a, 0x07, 0xe3, + }, + Size: 563, + Executable: false, + }, + }, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + // /share/man/man8 is a leaf directory + expectedDirectories["/share/man/man8"] = &castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{}, + Files: []*castorev1pb.FileNode{ + { + Name: []byte("arp.8.gz"), + Digest: []byte{ + 0xf5, 0x35, 0x4e, 0xf5, 0xf6, 0x44, 0xf7, 0x52, 0x0f, 0x42, 0xa0, 0x26, 0x51, 0xd9, 0x89, 0xf9, 0x68, 0xf2, 0xef, 0xeb, 0xba, 0xe1, 0xf4, 0x55, 0x01, 0x57, 0x77, 0xb7, 0x68, 0x55, 0x92, 0xef, + }, + Size: 2464, + Executable: false, + }, + { + Name: []byte("ifconfig.8.gz"), + Digest: []byte{ + 0x18, 0x65, 0x25, 0x11, 0x32, 0xee, 0x77, 0x91, 0x35, 0x4c, 0x3c, 0x24, 0xdb, 0xaf, 0x66, 0xdb, 0xfc, 0x17, 0x7b, 0xba, 0xe1, 0x3d, 0x05, 0xd2, 0xca, 0x6e, 0x2c, 0xe4, 0xef, 0xb8, 0xa8, 0xbe, + }, + Size: 3382, + Executable: false, + }, + { + Name: []byte("nameif.8.gz"), + Digest: []byte{ + 0x73, 0xc1, 0x27, 0xe8, 0x3b, 0xa8, 0x49, 0xdc, 0x0e, 0xdf, 0x70, 0x5f, 0xaf, 0x06, 0x01, 0x2c, 0x62, 0xe9, 0x18, 0x67, 0x01, 0x94, 0x64, 0x26, 0xca, 0x95, 0x22, 0xc0, 0xdc, 0xe4, 0x42, 0xb6, + }, + Size: 523, + Executable: false, + }, + { + Name: []byte("netstat.8.gz"), + Digest: []byte{ + 0xc0, 0x86, 0x43, 0x4a, 0x43, 0x57, 0xaa, 0x84, 0xa7, 0x24, 0xa0, 0x7c, 0x65, 0x38, 0x46, 0x1c, 0xf2, 0x45, 0xa2, 0xef, 0x12, 0x44, 0x18, 0xba, 0x52, 0x56, 0xe9, 0x8e, 0x6a, 0x0f, 0x70, 0x63, + }, + Size: 4284, + Executable: false, + }, + { + Name: []byte("plipconfig.8.gz"), + Digest: []byte{ + 0x2a, 0xd9, 0x1d, 0xa8, 0x9e, 0x0d, 0x05, 0xd0, 0xb0, 0x49, 0xaa, 0x64, 0xba, 0x29, 0x28, 0xc6, 0x45, 0xe1, 0xbb, 0x5e, 0x72, 0x8d, 0x48, 0x7b, 0x09, 0x4f, 0x0a, 0x82, 0x1e, 0x26, 0x83, 0xab, + }, + Size: 889, + Executable: false, + }, + { + Name: []byte("rarp.8.gz"), + Digest: []byte{ + 0x3d, 0x51, 0xc1, 0xd0, 0x6a, 0x59, 0x1e, 0x6d, 0x9a, 0xf5, 0x06, 0xd2, 0xe7, 0x7d, 0x7d, 0xd0, 0x70, 0x3d, 0x84, 0x64, 0xc3, 0x7d, 0xfb, 0x10, 0x84, 0x3b, 0xe1, 0xa9, 0xdf, 0x46, 0xee, 0x9f, + }, + Size: 1198, + Executable: false, + }, + { + Name: []byte("route.8.gz"), + Digest: []byte{ + 0x2a, 0x5a, 0x4b, 0x4f, 0x91, 0xf2, 0x78, 0xe4, 0xa9, 0x25, 0xb2, 0x7f, 0xa7, 0x2a, 0xc0, 0x8a, 0x4a, 0x65, 0xc9, 0x5f, 0x07, 0xa0, 0x48, 0x44, 0xeb, 0x46, 0xf9, 0xc9, 0xe1, 0x17, 0x96, 0x21, + }, + Size: 3525, + Executable: false, + }, + { + Name: []byte("slattach.8.gz"), + Digest: []byte{ + 0x3f, 0x05, 0x6b, 0x20, 0xe1, 0xe4, 0xf0, 0xba, 0x16, 0x15, 0x66, 0x6b, 0x57, 0x96, 0xe9, 0x9d, 0x83, 0xa8, 0x20, 0xaf, 0x8a, 0xca, 0x16, 0x4d, 0xa2, 0x6d, 0x94, 0x8e, 0xca, 0x91, 0x8f, 0xd4, + }, + Size: 1441, + Executable: false, + }, + }, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + // /share/man holds /share/man/man{1,5,8}. + expectedDirectories["/share/man"] = &castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{ + { + Name: []byte("man1"), + Digest: mustDirectoryDigest(expectedDirectories["/share/man/man1"]), + Size: expectedDirectories["/share/man/man1"].Size(), + }, + { + Name: []byte("man5"), + Digest: mustDirectoryDigest(expectedDirectories["/share/man/man5"]), + Size: expectedDirectories["/share/man/man5"].Size(), + }, + { + Name: []byte("man8"), + Digest: mustDirectoryDigest(expectedDirectories["/share/man/man8"]), + Size: expectedDirectories["/share/man/man8"].Size(), + }, + }, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + // /share holds /share/man. + expectedDirectories["/share"] = &castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{ + { + Name: []byte("man"), + Digest: mustDirectoryDigest(expectedDirectories["/share/man"]), + Size: expectedDirectories["/share/man"].Size(), + }, + }, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{}, + } + + // / holds /bin, /share, and a /sbin symlink. + expectedDirectories["/"] = &castorev1pb.Directory{ + Directories: []*castorev1pb.DirectoryNode{ + { + Name: []byte("bin"), + Digest: mustDirectoryDigest(expectedDirectories["/bin"]), + Size: expectedDirectories["/bin"].Size(), + }, + { + Name: []byte("share"), + Digest: mustDirectoryDigest(expectedDirectories["/share"]), + Size: expectedDirectories["/share"].Size(), + }, + }, + Files: []*castorev1pb.FileNode{}, + Symlinks: []*castorev1pb.SymlinkNode{ + { + Name: []byte("sbin"), + Target: []byte("bin"), + }, + }, + } + // assert we populated the two fixtures properly + require.Equal(t, len(expectedDirectoryPaths), len(expectedDirectories)) + + numDirectoriesReceived := 0 + + rootNode, narSize, narSha256, err := importer.Import( + context.Background(), + f, + func(blobReader io.Reader) ([]byte, error) { + // Don't really bother reading and comparing the contents here, + // We already verify the right digests are produced by comparing the + // directoryCb calls, and TestRegular ensures the reader works. + return mustBlobDigest(blobReader), nil + }, func(directory *castorev1pb.Directory) ([]byte, error) { + // use actualDirectoryOrder to look up the Directory object we expect at this specific invocation. + currentDirectoryPath := expectedDirectoryPaths[numDirectoriesReceived] + + expectedDirectory, found := expectedDirectories[currentDirectoryPath] + require.True(t, found, "must find the current directory") + + requireProtoEq(t, expectedDirectory, directory) + + numDirectoriesReceived += 1 + return mustDirectoryDigest(directory), nil + }, + ) + require.NoError(t, err) + require.Equal(t, &castorev1pb.Node{ + Node: &castorev1pb.Node_Directory{ + Directory: &castorev1pb.DirectoryNode{ + Name: []byte(""), + Digest: mustDirectoryDigest(expectedDirectories["/"]), + Size: expectedDirectories["/"].Size(), + }, + }, + }, rootNode) + require.Equal(t, []byte{ + 0xc6, 0xe1, 0x55, 0xb3, 0x45, 0x6e, 0x30, 0xb7, 0x61, 0x22, 0x63, 0xec, 0x09, 0x50, 0x70, 0x81, 0x1c, 0xaf, 0x8a, 0xbf, 0xd5, 0x9f, 0xaa, 0x72, 0xab, 0x82, 0xa5, 0x92, 0xef, 0xde, 0xb2, 0x53, + }, narSha256) + require.Equal(t, uint64(464152), narSize) +} + +// TestCallbackErrors ensures that errors returned from the callback function +// bubble up to the importer process, and are not ignored. +func TestCallbackErrors(t *testing.T) { + t.Run("callback blob", func(t *testing.T) { + // Pick an example NAR with a regular file. + f, err := os.Open("../../testdata/onebyteregular.nar") + require.NoError(t, err) + + targetErr := errors.New("expected error") + + _, _, _, err = importer.Import( + context.Background(), + f, + func(blobReader io.Reader) ([]byte, error) { + return nil, targetErr + }, func(directory *castorev1pb.Directory) ([]byte, error) { + panic("no directories expected!") + }, + ) + require.ErrorIs(t, err, targetErr) + }) + t.Run("callback directory", func(t *testing.T) { + // Pick an example NAR with a directory node + f, err := os.Open("../../testdata/emptydirectory.nar") + require.NoError(t, err) + + targetErr := errors.New("expected error") + + _, _, _, err = importer.Import( + context.Background(), + f, + func(blobReader io.Reader) ([]byte, error) { + panic("no file contents expected!") + }, func(directory *castorev1pb.Directory) ([]byte, error) { + return nil, targetErr + }, + ) + require.ErrorIs(t, err, targetErr) + }) +} + +// TestPopDirectories is a regression test that ensures we handle the directory +// stack properly. +// +// This test case looks like: +// +// / (dir) +// /test (dir) +// /test/tested (file) +// /tested (file) +// +// We used to have a bug where the second `tested` file would appear as if +// it was in the `/test` dir because it has that dir as a string prefix. +func TestPopDirectories(t *testing.T) { + f, err := os.Open("../../testdata/popdirectories.nar") + require.NoError(t, err) + defer f.Close() + + _, _, _, err = importer.Import( + context.Background(), + f, + func(blobReader io.Reader) ([]byte, error) { return mustBlobDigest(blobReader), nil }, + func(directory *castorev1pb.Directory) ([]byte, error) { + require.NoError(t, directory.Validate(), "directory validation shouldn't error") + return mustDirectoryDigest(directory), nil + }, + ) + require.NoError(t, err) +} diff --git a/tvix/nar-bridge-go/pkg/importer/roundtrip_test.go b/tvix/nar-bridge-go/pkg/importer/roundtrip_test.go new file mode 100644 index 000000000000..c50d332d85dc --- /dev/null +++ b/tvix/nar-bridge-go/pkg/importer/roundtrip_test.go @@ -0,0 +1,85 @@ +package importer_test + +import ( + "bytes" + "context" + "encoding/base64" + "fmt" + "io" + "os" + "sync" + "testing" + + castorev1pb "code.tvl.fyi/tvix/castore-go" + "code.tvl.fyi/tvix/nar-bridge-go/pkg/importer" + storev1pb "code.tvl.fyi/tvix/store-go" + "github.com/stretchr/testify/require" +) + +func TestRoundtrip(t *testing.T) { + // We pipe nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar to + // storev1pb.Export, and store all the file contents and directory objects + // received in two hashmaps. + // We then feed it to the writer, and test we come up with the same NAR file. + + f, err := os.Open("../../testdata/nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar") + require.NoError(t, err) + + narContents, err := io.ReadAll(f) + require.NoError(t, err) + + var mu sync.Mutex + blobsMap := make(map[string][]byte, 0) + directoriesMap := make(map[string]*castorev1pb.Directory) + + rootNode, _, _, err := importer.Import( + context.Background(), + bytes.NewBuffer(narContents), + func(blobReader io.Reader) ([]byte, error) { + // read in contents, we need to put it into filesMap later. + contents, err := io.ReadAll(blobReader) + require.NoError(t, err) + + dgst := mustBlobDigest(bytes.NewReader(contents)) + + // put it in filesMap + mu.Lock() + blobsMap[base64.StdEncoding.EncodeToString(dgst)] = contents + mu.Unlock() + + return dgst, nil + }, + func(directory *castorev1pb.Directory) ([]byte, error) { + dgst := mustDirectoryDigest(directory) + + directoriesMap[base64.StdEncoding.EncodeToString(dgst)] = directory + return dgst, nil + }, + ) + + require.NoError(t, err) + + // done populating everything, now actually test the export :-) + var narBuf bytes.Buffer + err = storev1pb.Export( + &narBuf, + rootNode, + func(directoryDgst []byte) (*castorev1pb.Directory, error) { + d, found := directoriesMap[base64.StdEncoding.EncodeToString(directoryDgst)] + if !found { + panic(fmt.Sprintf("directory %v not found", base64.StdEncoding.EncodeToString(directoryDgst))) + } + return d, nil + }, + func(blobDgst []byte) (io.ReadCloser, error) { + blobContents, found := blobsMap[base64.StdEncoding.EncodeToString(blobDgst)] + if !found { + panic(fmt.Sprintf("blob %v not found", base64.StdEncoding.EncodeToString(blobDgst))) + } + return io.NopCloser(bytes.NewReader(blobContents)), nil + }, + ) + + require.NoError(t, err, "exporter shouldn't fail") + require.Equal(t, narContents, narBuf.Bytes()) +} diff --git a/tvix/nar-bridge-go/pkg/importer/util_test.go b/tvix/nar-bridge-go/pkg/importer/util_test.go new file mode 100644 index 000000000000..06353cf582e5 --- /dev/null +++ b/tvix/nar-bridge-go/pkg/importer/util_test.go @@ -0,0 +1,34 @@ +package importer_test + +import ( + "io" + "testing" + + castorev1pb "code.tvl.fyi/tvix/castore-go" + "github.com/google/go-cmp/cmp" + "google.golang.org/protobuf/testing/protocmp" + "lukechampine.com/blake3" +) + +func requireProtoEq(t *testing.T, expected interface{}, actual interface{}) { + if diff := cmp.Diff(expected, actual, protocmp.Transform()); diff != "" { + t.Errorf("unexpected difference:\n%v", diff) + } +} + +func mustDirectoryDigest(d *castorev1pb.Directory) []byte { + dgst, err := d.Digest() + if err != nil { + panic(err) + } + return dgst +} + +func mustBlobDigest(r io.Reader) []byte { + hasher := blake3.New(32, nil) + _, err := io.Copy(hasher, r) + if err != nil { + panic(err) + } + return hasher.Sum([]byte{}) +} diff --git a/tvix/nar-bridge-go/testdata/emptydirectory.nar b/tvix/nar-bridge-go/testdata/emptydirectory.nar new file mode 100644 index 000000000000..baba55862255 --- /dev/null +++ b/tvix/nar-bridge-go/testdata/emptydirectory.nar Binary files differdiff --git a/tvix/nar-bridge-go/testdata/nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar b/tvix/nar-bridge-go/testdata/nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar new file mode 100644 index 000000000000..6cb0b16e5d5d --- /dev/null +++ b/tvix/nar-bridge-go/testdata/nar_1094wph9z4nwlgvsd53abfz8i117ykiv5dwnq9nnhz846s7xqd7d.nar Binary files differdiff --git a/tvix/nar-bridge-go/testdata/onebyteexecutable.nar b/tvix/nar-bridge-go/testdata/onebyteexecutable.nar new file mode 100644 index 000000000000..68682196665c --- /dev/null +++ b/tvix/nar-bridge-go/testdata/onebyteexecutable.nar Binary files differdiff --git a/tvix/nar-bridge-go/testdata/onebyteregular.nar b/tvix/nar-bridge-go/testdata/onebyteregular.nar new file mode 100644 index 000000000000..b8c94932bf0c --- /dev/null +++ b/tvix/nar-bridge-go/testdata/onebyteregular.nar Binary files differdiff --git a/tvix/nar-bridge-go/testdata/popdirectories.nar b/tvix/nar-bridge-go/testdata/popdirectories.nar new file mode 100644 index 000000000000..74313aca529f --- /dev/null +++ b/tvix/nar-bridge-go/testdata/popdirectories.nar Binary files differdiff --git a/tvix/nar-bridge-go/testdata/symlink.nar b/tvix/nar-bridge-go/testdata/symlink.nar new file mode 100644 index 000000000000..7990e4ad5bc2 --- /dev/null +++ b/tvix/nar-bridge-go/testdata/symlink.nar Binary files differ |