From d0563294121a85ecddbcc44474373b9293c31e7f Mon Sep 17 00:00:00 2001 From: Connor Brewster Date: Sun, 21 Jan 2024 14:52:46 -0600 Subject: feat(tvix/store/nar-bridge): Setup OpenTelemetry Sets up OpenTelemetry integration for nar-bridge. Right now it will export spans for HTTP server requests and all gRPC client requests. Having the spans available will make performance work significantly easier as it provides a high level overview of where time is being spent. In the future we can add application-specifc metrics and integrate logrus. Change-Id: Ie3860675d7ffc626a95673ba062c3c798d8bb2a7 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10678 Reviewed-by: flokli Tested-by: BuildkiteCI Autosubmit: Connor Brewster --- tvix/nar-bridge/cmd/nar-bridge-http/main.go | 26 +++++++-- tvix/nar-bridge/cmd/nar-bridge-http/otel.go | 87 +++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+), 4 deletions(-) create mode 100644 tvix/nar-bridge/cmd/nar-bridge-http/otel.go (limited to 'tvix/nar-bridge/cmd') diff --git a/tvix/nar-bridge/cmd/nar-bridge-http/main.go b/tvix/nar-bridge/cmd/nar-bridge-http/main.go index 2eed4f4f4c4b..171ea7f5bdd0 100644 --- a/tvix/nar-bridge/cmd/nar-bridge-http/main.go +++ b/tvix/nar-bridge/cmd/nar-bridge-http/main.go @@ -4,17 +4,18 @@ import ( "context" "os" "os/signal" + "runtime/debug" "time" "github.com/alecthomas/kong" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" castorev1pb "code.tvl.fyi/tvix/castore-go" narBridgeHttp "code.tvl.fyi/tvix/nar-bridge/pkg/http" storev1pb "code.tvl.fyi/tvix/store-go" - "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus" ) @@ -24,24 +25,41 @@ var cli struct { ListenAddr string `name:"listen-addr" help:"The address this service listens on" type:"string" default:"[::]:9000"` //nolint:lll EnableAccessLog bool `name:"access-log" help:"Enable access logging" type:"bool" default:"true" negatable:""` //nolint:lll StoreAddr string `name:"store-addr" help:"The address to the tvix-store RPC interface this will connect to" default:"localhost:8000"` //nolint:lll + EnableOtlp bool `name:"otlp" help:"Enable OpenTelemetry for logs, spans, and metrics" default:"true"` //nolint:lll } func main() { _ = kong.Parse(&cli) - logLevel, err := logrus.ParseLevel(cli.LogLevel) + logLevel, err := log.ParseLevel(cli.LogLevel) if err != nil { log.Panic("invalid log level") return } - logrus.SetLevel(logLevel) + log.SetLevel(logLevel) ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt) defer stop() + if cli.EnableOtlp { + buildInfo, ok := debug.ReadBuildInfo() + if !ok { + log.Fatal("failed to read build info") + } + + shutdown, err := setupOpenTelemetry(ctx, "nar-bridge", buildInfo.Main.Version) + if err != nil { + log.WithError(err).Fatal("failed to setup OpenTelemetry") + } + defer shutdown(context.Background()) + } + // connect to tvix-store log.Debugf("Dialing to %v", cli.StoreAddr) - conn, err := grpc.DialContext(ctx, cli.StoreAddr, grpc.WithTransportCredentials(insecure.NewCredentials())) + conn, err := grpc.DialContext(ctx, cli.StoreAddr, + grpc.WithTransportCredentials(insecure.NewCredentials()), + grpc.WithStatsHandler(otelgrpc.NewClientHandler()), + ) if err != nil { log.Fatalf("did not connect: %v", err) } diff --git a/tvix/nar-bridge/cmd/nar-bridge-http/otel.go b/tvix/nar-bridge/cmd/nar-bridge-http/otel.go new file mode 100644 index 000000000000..c446c6ec1a14 --- /dev/null +++ b/tvix/nar-bridge/cmd/nar-bridge-http/otel.go @@ -0,0 +1,87 @@ +package main + +import ( + "context" + "errors" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" + "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.24.0" +) + +func setupOpenTelemetry(ctx context.Context, serviceName, serviceVersion string) (func(context.Context) error, error) { + var shutdownFuncs []func(context.Context) error + shutdown := func(ctx context.Context) error { + var err error + for _, fn := range shutdownFuncs { + err = errors.Join(err, fn(ctx)) + } + shutdownFuncs = nil + return err + } + + res, err := resource.Merge( + resource.Default(), + resource.NewWithAttributes( + semconv.SchemaURL, + semconv.ServiceName(serviceName), + semconv.ServiceVersion(serviceVersion), + ), + ) + if err != nil { + return nil, errors.Join(err, shutdown(ctx)) + } + + prop := propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + ) + otel.SetTextMapPropagator(prop) + + tracerProvider, err := newTraceProvider(ctx, res) + if err != nil { + return nil, errors.Join(err, shutdown(ctx)) + } + shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown) + otel.SetTracerProvider(tracerProvider) + + meterProvider, err := newMeterProvider(ctx, res) + if err != nil { + return nil, errors.Join(err, shutdown(ctx)) + } + shutdownFuncs = append(shutdownFuncs, meterProvider.Shutdown) + otel.SetMeterProvider(meterProvider) + + return shutdown, nil +} + +func newTraceProvider(ctx context.Context, res *resource.Resource) (*trace.TracerProvider, error) { + traceExporter, err := otlptracegrpc.New(ctx) + if err != nil { + return nil, err + } + + traceProvider := trace.NewTracerProvider( + trace.WithBatcher(traceExporter), + trace.WithResource(res), + ) + return traceProvider, nil +} + +func newMeterProvider(ctx context.Context, res *resource.Resource) (*metric.MeterProvider, error) { + metricExporter, err := otlpmetricgrpc.New(ctx) + if err != nil { + return nil, err + } + + meterProvider := metric.NewMeterProvider( + metric.WithResource(res), + metric.WithReader(metric.NewPeriodicReader(metricExporter)), + ) + return meterProvider, nil +} -- cgit 1.4.1