From 19afe5ffbb5ddad2b8540f70dea64f6b14bd2ad6 Mon Sep 17 00:00:00 2001 From: James Humphries Date: Wed, 7 May 2025 16:20:11 +0100 Subject: [PATCH] Consume TRACEPARENT and TRACESTATE to construct the OTel trace context (#2763) Signed-off-by: James Humphries --- cmd/tofu/main.go | 3 +-- internal/tracing/init.go | 48 ++++++++++++++++++++++++++++++++++----- internal/tracing/utils.go | 4 ++-- 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/cmd/tofu/main.go b/cmd/tofu/main.go index a8002e648a..37d534b9bc 100644 --- a/cmd/tofu/main.go +++ b/cmd/tofu/main.go @@ -70,7 +70,7 @@ func main() { func realMain() int { defer logging.PanicHandler() - err := tracing.OpenTelemetryInit() + ctx, err := tracing.OpenTelemetryInit(context.Background()) if err != nil { // openTelemetryInit can only fail if OpenTofu was run with an // explicit environment variable to enable telemetry collection, @@ -81,7 +81,6 @@ func realMain() int { return 1 } defer tracing.ForceFlush(5 * time.Second) - ctx := context.Background() // At minimum, we emit a span covering the entire command execution. ctx, span := tracing.Tracer().Start(ctx, "tofu") diff --git a/internal/tracing/init.go b/internal/tracing/init.go index fb140f7477..7d03ffa333 100644 --- a/internal/tracing/init.go +++ b/internal/tracing/init.go @@ -39,6 +39,18 @@ better based on experience with this experiment. // then we'll enable an experimental OTLP trace exporter. const OTELExporterEnvVar = "OTEL_TRACES_EXPORTER" +// traceParentEnvVar is the env var that should be used to instruct opentofu which +// trace parent to use. +// If this environment variable is set when running OpenTofu CLI +// then we'll extract the traceparent from the environment and add it to the context. +// This ensures that all opentofu traces are linked to the trace that invoked +// this command. +const traceParentEnvVar = "TRACEPARENT" + +// traceStateEnvVar is the env var that should be used to instruct opentofu which +// trace state to use. +const traceStateEnvVar = "TRACESTATE" + // isTracingEnabled is true if OpenTelemetry is enabled. var isTracingEnabled bool @@ -60,14 +72,19 @@ var isTracingEnabled bool // means another relatively-heavy external dependency. OTLP happens to use // protocol buffers and gRPC, which OpenTofu would depend on for other reasons // anyway. -func OpenTelemetryInit() error { +// +// Returns the context with trace context extracted from environment variables +// if TRACEPARENT is set. +func OpenTelemetryInit(ctx context.Context) (context.Context, error) { isTracingEnabled = false + // We'll check the environment variable ourselves first, because the // "autoexport" helper we're about to use is built under the assumption // that exporting should always be enabled and so will expect to find // an OTLP server on localhost if no environment variables are set at all. if os.Getenv(OTELExporterEnvVar) != "otlp" { - return nil // By default, we just discard all telemetry calls + log.Printf("[TRACE] OpenTelemetry: %s not set, OTel tracing is not enabled", OTELExporterEnvVar) + return ctx, nil // By default, we just discard all telemetry calls } isTracingEnabled = true @@ -95,12 +112,30 @@ func OpenTelemetryInit() error { ), ) if err != nil { - return fmt.Errorf("failed to create resource: %w", err) + return ctx, fmt.Errorf("failed to create resource: %w", err) } - exporter, err := autoexport.NewSpanExporter(context.Background()) + // Check if the trace parent/state environment variable is set and extract it into our context + if traceparent := os.Getenv(traceParentEnvVar); traceparent != "" { + log.Printf("[TRACE] OpenTelemetry: found trace parent in environment: %s", traceparent) + // Create a carrier that contains the traceparent from environment variables + // The key is lowercase because the TraceContext propagator expects lowercase keys + propCarrier := make(propagation.MapCarrier) + propCarrier.Set("traceparent", traceparent) + + if tracestate := os.Getenv(traceStateEnvVar); tracestate != "" { + log.Printf("[TRACE] OpenTelemetry: found trace state in environment: %s", traceparent) + propCarrier.Set("tracestate", tracestate) + } + + // Extract the trace context into the context + tc := propagation.TraceContext{} + ctx = tc.Extract(ctx, propCarrier) + } + + exporter, err := autoexport.NewSpanExporter(ctx) if err != nil { - return err + return ctx, err } // Set the global tracer provider, this allows us to use this global TracerProvider @@ -114,6 +149,7 @@ func OpenTelemetryInit() error { ) otel.SetTracerProvider(provider) + // Create a composite propagator that includes both TraceContext and Baggage prop := propagation.NewCompositeTextMapPropagator(propagation.TraceContext{}, propagation.Baggage{}) otel.SetTextMapPropagator(prop) @@ -124,5 +160,5 @@ func OpenTelemetryInit() error { panic(fmt.Sprintf("OpenTelemetry error: %v", err)) })) - return nil + return ctx, nil } diff --git a/internal/tracing/utils.go b/internal/tracing/utils.go index 5da2528374..127b41bb98 100644 --- a/internal/tracing/utils.go +++ b/internal/tracing/utils.go @@ -81,14 +81,14 @@ func ForceFlush(timeout time.Duration) { provider, ok := otel.GetTracerProvider().(*sdktrace.TracerProvider) if !ok { - log.Printf("[DEBUG] OpenTelemetry: tracer provider is not an SDK provider, can't force flush") + log.Printf("[TRACE] OpenTelemetry: tracer provider is not an SDK provider, can't force flush") return } ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() - log.Printf("[DEBUG] OpenTelemetry: flushing spans") + log.Printf("[TRACE] OpenTelemetry: flushing spans") if err := provider.ForceFlush(ctx); err != nil { log.Printf("[WARN] OpenTelemetry: error flushing spans: %v", err) }