about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--tvix/docs/src/TODO.md10
-rw-r--r--tvix/tracing/src/lib.rs17
2 files changed, 15 insertions, 12 deletions
diff --git a/tvix/docs/src/TODO.md b/tvix/docs/src/TODO.md
index f07bfa122a6d..195f2c6945c8 100644
--- a/tvix/docs/src/TODO.md
+++ b/tvix/docs/src/TODO.md
@@ -216,16 +216,6 @@ logs etc, but this is something requiring a lot of designing.
  - Maybe drop `--log-level` entirely, and only use `RUST_LOG` env exclusively?
    `debug`,`trace` level across all crates is a bit useless, and `RUST_LOG` can
    be much more granular…
- - The OTLP stack is quite spammy if there's no OTLP collector running on
-   localhost.
-   https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/
-   mentions a `OTEL_SDK_DISABLED` env var, but it defaults to false, so they
-   suggest enabling OTLP by default.
-   We currently have a `--otlp` cmdline arg which explicitly needs to be set to
-   false to stop it, in line with that "enabled by default" philosophy
-   Do some research if we can be less spammy. While OTLP support is
-   feature-flagged, it should not get in the way too much, so we can actually
-   have it compiled in most of the time.
  - gRPC trace propagation (cl/10532 + @simon)
    We need to wire trace propagation into our gRPC clients, so if we collect
    traces both for the client and server they will be connected.
diff --git a/tvix/tracing/src/lib.rs b/tvix/tracing/src/lib.rs
index 9cfe5afa52d2..c8c1385c3fba 100644
--- a/tvix/tracing/src/lib.rs
+++ b/tvix/tracing/src/lib.rs
@@ -10,7 +10,7 @@ use opentelemetry::KeyValue;
 #[cfg(feature = "otlp")]
 use opentelemetry_sdk::{
     resource::{ResourceDetector, SdkProvidedResourceDetector},
-    trace::BatchConfig,
+    trace::BatchConfigBuilder,
     Resource,
 };
 
@@ -173,7 +173,20 @@ impl TracingBuilder {
                 let tracer = opentelemetry_otlp::new_pipeline()
                     .tracing()
                     .with_exporter(opentelemetry_otlp::new_exporter().tonic())
-                    .with_batch_config(BatchConfig::default())
+                    .with_batch_config(
+                        BatchConfigBuilder::default()
+                            // the default values for `max_export_batch_size` is set to 512, which we will fill
+                            // pretty quickly, which will then result in an export. We want to make sure that
+                            // the export is only done once the schedule is met and not as soon as 512 spans
+                            // are collected.
+                            .with_max_export_batch_size(4096)
+                            // analog to default config `max_export_batch_size * 4`
+                            .with_max_queue_size(4096 * 4)
+                            // only force an export to the otlp collector every 10 seconds to reduce the amount
+                            // of error messages if an otlp collector is not available
+                            .with_scheduled_delay(std::time::Duration::from_secs(10))
+                            .build(),
+                    )
                     .with_trace_config(opentelemetry_sdk::trace::config().with_resource({
                         // use SdkProvidedResourceDetector.detect to detect resources,
                         // but replace the default service name with our default.