about summary refs log tree commit diff
path: root/tvix/nar-bridge/cmd
diff options
context:
space:
mode:
authorConnor Brewster <cbrewster@hey.com>2024-01-21T20·52-0600
committerclbot <clbot@tvl.fyi>2024-01-23T15·50+0000
commitd0563294121a85ecddbcc44474373b9293c31e7f (patch)
tree021246b2b903485df2447b80b767cca7600d6757 /tvix/nar-bridge/cmd
parente8061fc6190fdeaadb9d8ebc8d8dba15c9c6f5c4 (diff)
feat(tvix/store/nar-bridge): Setup OpenTelemetry r/7442
Sets up OpenTelemetry integration for nar-bridge. Right now it will
export spans for HTTP server requests and all gRPC client requests.
Having the spans available will make performance work significantly
easier as it provides a high level overview of where time is being
spent.

In the future we can add application-specifc metrics and
integrate logrus.

Change-Id: Ie3860675d7ffc626a95673ba062c3c798d8bb2a7
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10678
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Autosubmit: Connor Brewster <cbrewster@hey.com>
Diffstat (limited to 'tvix/nar-bridge/cmd')
-rw-r--r--tvix/nar-bridge/cmd/nar-bridge-http/main.go26
-rw-r--r--tvix/nar-bridge/cmd/nar-bridge-http/otel.go87
2 files changed, 109 insertions, 4 deletions
diff --git a/tvix/nar-bridge/cmd/nar-bridge-http/main.go b/tvix/nar-bridge/cmd/nar-bridge-http/main.go
index 2eed4f4f4c..171ea7f5bd 100644
--- a/tvix/nar-bridge/cmd/nar-bridge-http/main.go
+++ b/tvix/nar-bridge/cmd/nar-bridge-http/main.go
@@ -4,17 +4,18 @@ import (
 	"context"
 	"os"
 	"os/signal"
+	"runtime/debug"
 	"time"
 
 	"github.com/alecthomas/kong"
 
+	"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials/insecure"
 
 	castorev1pb "code.tvl.fyi/tvix/castore-go"
 	narBridgeHttp "code.tvl.fyi/tvix/nar-bridge/pkg/http"
 	storev1pb "code.tvl.fyi/tvix/store-go"
-	"github.com/sirupsen/logrus"
 	log "github.com/sirupsen/logrus"
 )
 
@@ -24,24 +25,41 @@ var cli struct {
 	ListenAddr      string `name:"listen-addr" help:"The address this service listens on" type:"string" default:"[::]:9000"`                    //nolint:lll
 	EnableAccessLog bool   `name:"access-log" help:"Enable access logging" type:"bool" default:"true" negatable:""`                             //nolint:lll
 	StoreAddr       string `name:"store-addr" help:"The address to the tvix-store RPC interface this will connect to" default:"localhost:8000"` //nolint:lll
+	EnableOtlp      bool   `name:"otlp" help:"Enable OpenTelemetry for logs, spans, and metrics" default:"true"`                                //nolint:lll
 }
 
 func main() {
 	_ = kong.Parse(&cli)
 
-	logLevel, err := logrus.ParseLevel(cli.LogLevel)
+	logLevel, err := log.ParseLevel(cli.LogLevel)
 	if err != nil {
 		log.Panic("invalid log level")
 		return
 	}
-	logrus.SetLevel(logLevel)
+	log.SetLevel(logLevel)
 
 	ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt)
 	defer stop()
 
+	if cli.EnableOtlp {
+		buildInfo, ok := debug.ReadBuildInfo()
+		if !ok {
+			log.Fatal("failed to read build info")
+		}
+
+		shutdown, err := setupOpenTelemetry(ctx, "nar-bridge", buildInfo.Main.Version)
+		if err != nil {
+			log.WithError(err).Fatal("failed to setup OpenTelemetry")
+		}
+		defer shutdown(context.Background())
+	}
+
 	// connect to tvix-store
 	log.Debugf("Dialing to %v", cli.StoreAddr)
-	conn, err := grpc.DialContext(ctx, cli.StoreAddr, grpc.WithTransportCredentials(insecure.NewCredentials()))
+	conn, err := grpc.DialContext(ctx, cli.StoreAddr,
+		grpc.WithTransportCredentials(insecure.NewCredentials()),
+		grpc.WithStatsHandler(otelgrpc.NewClientHandler()),
+	)
 	if err != nil {
 		log.Fatalf("did not connect: %v", err)
 	}
diff --git a/tvix/nar-bridge/cmd/nar-bridge-http/otel.go b/tvix/nar-bridge/cmd/nar-bridge-http/otel.go
new file mode 100644
index 0000000000..c446c6ec1a
--- /dev/null
+++ b/tvix/nar-bridge/cmd/nar-bridge-http/otel.go
@@ -0,0 +1,87 @@
+package main
+
+import (
+	"context"
+	"errors"
+
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
+	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
+	"go.opentelemetry.io/otel/propagation"
+	"go.opentelemetry.io/otel/sdk/metric"
+	"go.opentelemetry.io/otel/sdk/resource"
+	"go.opentelemetry.io/otel/sdk/trace"
+	semconv "go.opentelemetry.io/otel/semconv/v1.24.0"
+)
+
+func setupOpenTelemetry(ctx context.Context, serviceName, serviceVersion string) (func(context.Context) error, error) {
+	var shutdownFuncs []func(context.Context) error
+	shutdown := func(ctx context.Context) error {
+		var err error
+		for _, fn := range shutdownFuncs {
+			err = errors.Join(err, fn(ctx))
+		}
+		shutdownFuncs = nil
+		return err
+	}
+
+	res, err := resource.Merge(
+		resource.Default(),
+		resource.NewWithAttributes(
+			semconv.SchemaURL,
+			semconv.ServiceName(serviceName),
+			semconv.ServiceVersion(serviceVersion),
+		),
+	)
+	if err != nil {
+		return nil, errors.Join(err, shutdown(ctx))
+	}
+
+	prop := propagation.NewCompositeTextMapPropagator(
+		propagation.TraceContext{},
+		propagation.Baggage{},
+	)
+	otel.SetTextMapPropagator(prop)
+
+	tracerProvider, err := newTraceProvider(ctx, res)
+	if err != nil {
+		return nil, errors.Join(err, shutdown(ctx))
+	}
+	shutdownFuncs = append(shutdownFuncs, tracerProvider.Shutdown)
+	otel.SetTracerProvider(tracerProvider)
+
+	meterProvider, err := newMeterProvider(ctx, res)
+	if err != nil {
+		return nil, errors.Join(err, shutdown(ctx))
+	}
+	shutdownFuncs = append(shutdownFuncs, meterProvider.Shutdown)
+	otel.SetMeterProvider(meterProvider)
+
+	return shutdown, nil
+}
+
+func newTraceProvider(ctx context.Context, res *resource.Resource) (*trace.TracerProvider, error) {
+	traceExporter, err := otlptracegrpc.New(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	traceProvider := trace.NewTracerProvider(
+		trace.WithBatcher(traceExporter),
+		trace.WithResource(res),
+	)
+	return traceProvider, nil
+}
+
+func newMeterProvider(ctx context.Context, res *resource.Resource) (*metric.MeterProvider, error) {
+	metricExporter, err := otlpmetricgrpc.New(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	meterProvider := metric.NewMeterProvider(
+		metric.WithResource(res),
+		metric.WithReader(metric.NewPeriodicReader(metricExporter)),
+	)
+	return meterProvider, nil
+}