From f8bc43d149c1a0826b0c0855c973f53d57b97199 Mon Sep 17 00:00:00 2001 From: Loic Nageleisen Date: Thu, 28 May 2026 14:53:12 +0200 Subject: [PATCH 1/3] Add fork safety native methods to `TraceExporter` Expose `_native_before_fork`, `_native_after_fork_in_parent`, and `_native_after_fork_in_child` instance methods that delegate to libdatadog's SharedRuntime fork hooks. These coordinate the tokio runtime lifecycle around process forks (Puma, Unicorn, Passenger). --- ext/libdatadog_api/trace_exporter.c | 51 +++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/ext/libdatadog_api/trace_exporter.c b/ext/libdatadog_api/trace_exporter.c index a0ab29284d..a8a53f9e0f 100644 --- a/ext/libdatadog_api/trace_exporter.c +++ b/ext/libdatadog_api/trace_exporter.c @@ -20,6 +20,9 @@ static VALUE _native_from_span(VALUE klass, VALUE span); /* TraceExporter methods */ static VALUE _native_exporter_new(int argc, VALUE *argv, VALUE klass); static VALUE _native_send_traces(VALUE self, VALUE traces); +static VALUE _native_before_fork(VALUE self); +static VALUE _native_after_fork_in_parent(VALUE self); +static VALUE _native_after_fork_in_child(VALUE self); /* Response helpers */ static VALUE create_ok_response(long trace_count, VALUE payload); @@ -473,6 +476,46 @@ static VALUE _native_exporter_new( exporter); } +/* ======================================================================== + * Fork safety hooks + * + * These coordinate the tokio runtime lifecycle around process forks + * (Puma, Unicorn, Passenger). + * ======================================================================== */ + +static VALUE _native_before_fork(VALUE self) { + ddog_TraceExporter *exporter; + TypedData_Get_Struct(self, ddog_TraceExporter, &trace_exporter_typed_data, exporter); + if (exporter == NULL) { + raise_error(rb_eRuntimeError, "TraceExporter has not been initialized or was already freed"); + } + ddog_TraceExporterError *err = ddog_trace_exporter_before_fork(exporter); + check_exporter_error("Failed to prepare for fork", err); + return Qnil; +} + +static VALUE _native_after_fork_in_parent(VALUE self) { + ddog_TraceExporter *exporter; + TypedData_Get_Struct(self, ddog_TraceExporter, &trace_exporter_typed_data, exporter); + if (exporter == NULL) { + raise_error(rb_eRuntimeError, "TraceExporter has not been initialized or was already freed"); + } + ddog_TraceExporterError *err = ddog_trace_exporter_after_fork_in_parent(exporter); + check_exporter_error("Failed to restore after fork in parent", err); + return Qnil; +} + +static VALUE _native_after_fork_in_child(VALUE self) { + ddog_TraceExporter *exporter; + TypedData_Get_Struct(self, ddog_TraceExporter, &trace_exporter_typed_data, exporter); + if (exporter == NULL) { + raise_error(rb_eRuntimeError, "TraceExporter has not been initialized or was already freed"); + } + ddog_TraceExporterError *err = ddog_trace_exporter_after_fork_in_child(exporter); + check_exporter_error("Failed to restore after fork in child", err); + return Qnil; +} + /* ======================================================================== * GVL-release helper for ddog_trace_exporter_send_trace_chunks * @@ -736,6 +779,14 @@ void trace_exporter_init(VALUE tracing_module) { rb_define_method(trace_exporter_class, "_native_send_traces", _native_send_traces, 1); + /* Instance: fork safety hooks */ + rb_define_method(trace_exporter_class, "_native_before_fork", + _native_before_fork, 0); + rb_define_method(trace_exporter_class, "_native_after_fork_in_parent", + _native_after_fork_in_parent, 0); + rb_define_method(trace_exporter_class, "_native_after_fork_in_child", + _native_after_fork_in_child, 0); + /* ---------------------------------------------------------------- * Response class (defined in Ruby, loaded lazily) * From f170f9efd00f985846887c54a438fc6df5968253 Mon Sep 17 00:00:00 2001 From: Loic Nageleisen Date: Thu, 28 May 2026 14:53:52 +0200 Subject: [PATCH 2/3] Replace `RUBY_UBF_IO` with cooperative cancellation token Create a cancellation token per send call and pass it to the custom unblock function. When Ruby interrupts the thread (shutdown, Thread#kill), the UBF cancels the token, which cooperatively aborts the in-flight HTTP request in the Rust runtime. This replaces the signal-based RUBY_UBF_IO which could not actually cancel the Rust HTTP pipeline. --- ext/libdatadog_api/trace_exporter.c | 32 +++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/ext/libdatadog_api/trace_exporter.c b/ext/libdatadog_api/trace_exporter.c index a8a53f9e0f..0fb9656382 100644 --- a/ext/libdatadog_api/trace_exporter.c +++ b/ext/libdatadog_api/trace_exporter.c @@ -528,6 +528,7 @@ typedef struct { const ddog_TraceExporter *exporter; ddog_TracerTraceChunks *chunks; ddog_TraceExporterResponse *response; + ddog_TraceExporterCancelToken *cancel_token; /* borrowed, not owned */ ddog_TraceExporterErrorCode error_code; bool failed; bool send_ran; @@ -536,7 +537,7 @@ typedef struct { static void *send_chunks_without_gvl(void *data) { send_chunks_args_t *args = (send_chunks_args_t *)data; ddog_TraceExporterError *err = ddog_trace_exporter_send_trace_chunks( - args->exporter, args->chunks, &args->response); + args->exporter, args->chunks, &args->response, args->cancel_token); if (err != NULL) { args->error_code = err->code; args->failed = true; @@ -546,6 +547,20 @@ static void *send_chunks_without_gvl(void *data) { return NULL; } +/* + * Unblock function: cooperatively cancel an in-flight send. + * + * Called by Ruby when an interrupt (Thread#kill, shutdown) fires while + * the thread is inside rb_thread_call_without_gvl2. Cancelling the + * token causes the Rust HTTP pipeline to abort the in-flight request + * and return promptly, which is not possible with RUBY_UBF_IO's + * signal-based approach. + */ +static void interrupt_exporter_call(void *cancel_token) { + ddog_trace_exporter_cancel_token_cancel( + (ddog_TraceExporterCancelToken *)cancel_token); +} + /* * Check for a pending Ruby exception without raising it. * Mirrors the profiling extension's check_if_pending_exception(). @@ -638,13 +653,24 @@ static VALUE build_and_send_traces(VALUE arg) { * response before any Ruby exception propagates -- otherwise we * would leak those Rust-allocated objects. * + * A cancellation token is created per send call and passed to the + * custom unblock function (interrupt_exporter_call). When Ruby + * interrupts the thread (shutdown, Thread#kill), the UBF cancels + * the token, which cooperatively aborts the in-flight HTTP request + * in the Rust runtime. This replaces the signal-based RUBY_UBF_IO + * which could not actually cancel the Rust HTTP pipeline. + * * An interrupt (e.g. Thread#kill) may cause gvl2 to return before * our function runs, so we loop until it does. */ + ddog_TraceExporterCancelToken cancel_token = + ddog_trace_exporter_cancel_token_new(); + send_chunks_args_t args = { .exporter = ctx->exporter, .chunks = ctx->chunks, .response = NULL, + .cancel_token = &cancel_token, .failed = false, .send_ran = false, }; @@ -653,12 +679,14 @@ static VALUE build_and_send_traces(VALUE arg) { while (!args.send_ran && !pending_exception) { rb_thread_call_without_gvl2( send_chunks_without_gvl, &args, - RUBY_UBF_IO, NULL); + interrupt_exporter_call, &cancel_token); if (!args.send_ran) { pending_exception = check_if_pending_exception(); } } + + ddog_trace_exporter_cancel_token_drop(&cancel_token); /* Only null chunks when the send actually ran and consumed them. * If an interrupt fired before the send executed, chunks are still * live and the ensure handler must free them. */ From 67d68aa4df35ae9057671b12f42507672a8a92a6 Mon Sep 17 00:00:00 2001 From: Loic Nageleisen Date: Thu, 28 May 2026 15:43:44 +0200 Subject: [PATCH 3/3] Wire fork safety into native transport via `AtForkMonkeyPatch` Register a `:child` callback that calls `_native_after_fork_in_child` on the exporter to recreate the tokio runtime in forked child processes. Without this, the Rust runtime is dead after fork and subsequent send calls would hang or fail. The `AtForkMonkeyPatch` only supports `:child` stage, so `before_fork` and `after_fork_in_parent` are not called. The child path is the critical one: it creates a fresh runtime regardless of whether the parent was prepared. --- lib/datadog/tracing/transport/native.rb | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/datadog/tracing/transport/native.rb b/lib/datadog/tracing/transport/native.rb index 895065dcaf..923b0ff60e 100644 --- a/lib/datadog/tracing/transport/native.rb +++ b/lib/datadog/tracing/transport/native.rb @@ -77,6 +77,15 @@ def initialize(agent_settings:, logger: Datadog.logger) service: service, version: version ) + + # In forked child processes the tokio runtime is dead. + # Recreate it so the exporter can send traces again. + exporter = @exporter + Core::Utils::AtForkMonkeyPatch.at_fork(:child) do + exporter._native_after_fork_in_child + rescue => e + Datadog.logger.warn { "Native transport after-fork reset failed: #{e}" } + end end # Send a list of traces to the agent.