From d979c2b8f4cd446d0ab51cbc416ec6df3b86bc43 Mon Sep 17 00:00:00 2001 From: ddsign Date: Fri, 27 Mar 2026 12:17:29 -0400 Subject: [PATCH 01/16] Add DI.exception_backtrace C extension to avoid customer code dispatch Same pattern as DI.exception_message: reads the internal `bt` ivar directly via rb_ivar_get, bypassing any Ruby-level override of Exception#backtrace. This ensures DI instrumentation never invokes customer code when serializing exception data. - Added exception_backtrace to ext/libdatadog_api/di.c - Updated serialize_throwable to use DI.exception_backtrace - Added RBS signature - Added unit tests for the C extension method - Added integration test for backtrace override bypass Co-Authored-By: Claude --- ext/libdatadog_api/di.c | 24 ++++++++ lib/datadog/di/probe_notification_builder.rb | 2 +- sig/datadog/di.rbs | 1 + .../di/ext/exception_backtrace_spec.rb | 60 +++++++++++++++++++ .../di/probe_notification_builder_spec.rb | 41 +++++++++++++ 5 files changed, 127 insertions(+), 1 deletion(-) create mode 100644 spec/datadog/di/ext/exception_backtrace_spec.rb diff --git a/ext/libdatadog_api/di.c b/ext/libdatadog_api/di.c index 35f9e199e4d..40f800ad52e 100644 --- a/ext/libdatadog_api/di.c +++ b/ext/libdatadog_api/di.c @@ -16,6 +16,10 @@ void rb_objspace_each_objects( // from standard library exception classes like NameError. static ID id_mesg; +// The ID value of the string "bt" which is used in Ruby source as +// id_bt or idBt, and is used to set and retrieve the exception backtrace. +static ID id_bt; + // Returns whether the argument is an IMEMO of type ISEQ. static bool ddtrace_imemo_iseq_p(VALUE v) { return rb_objspace_internal_object_p(v) && RB_TYPE_P(v, T_IMEMO) && ddtrace_imemo_type(v) == IMEMO_TYPE_ISEQ; @@ -70,10 +74,30 @@ static VALUE exception_message(DDTRACE_UNUSED VALUE _self, VALUE exception) { return rb_ivar_get(exception, id_mesg); } +/* + * call-seq: + * DI.exception_backtrace(exception) -> Array | nil + * + * Returns the raw backtrace stored on the exception object without + * invoking any Ruby-level method. + * + * This reads the internal +bt+ instance variable directly, bypassing + * any override of +Exception#backtrace+. This is important for DI + * instrumentation where we must not invoke customer code. + * + * @param exception [Exception] The exception object + * @return [Array, nil] The raw backtrace array, or nil if not set + */ +static VALUE exception_backtrace(DDTRACE_UNUSED VALUE _self, VALUE exception) { + return rb_ivar_get(exception, id_bt); +} + void di_init(VALUE datadog_module) { id_mesg = rb_intern("mesg"); + id_bt = rb_intern("bt"); VALUE di_module = rb_define_module_under(datadog_module, "DI"); rb_define_singleton_method(di_module, "all_iseqs", all_iseqs, 0); rb_define_singleton_method(di_module, "exception_message", exception_message, 1); + rb_define_singleton_method(di_module, "exception_backtrace", exception_backtrace, 1); } diff --git a/lib/datadog/di/probe_notification_builder.rb b/lib/datadog/di/probe_notification_builder.rb index ad6736f43c9..bbed71b1c28 100644 --- a/lib/datadog/di/probe_notification_builder.rb +++ b/lib/datadog/di/probe_notification_builder.rb @@ -196,7 +196,7 @@ def serialize_throwable(exception) { type: exception.class.name, message: message, - stacktrace: format_backtrace(exception.backtrace), + stacktrace: format_backtrace(DI.exception_backtrace(exception)), } end diff --git a/sig/datadog/di.rbs b/sig/datadog/di.rbs index 3878d708242..20e20716b02 100644 --- a/sig/datadog/di.rbs +++ b/sig/datadog/di.rbs @@ -10,6 +10,7 @@ module Datadog def self.all_iseqs: () -> Array[RubyVM::InstructionSequence] def self.file_iseqs: () -> Array[RubyVM::InstructionSequence] def self.exception_message: (Exception exception) -> untyped + def self.exception_backtrace: (Exception exception) -> Array[String]? def self.component: () -> Component diff --git a/spec/datadog/di/ext/exception_backtrace_spec.rb b/spec/datadog/di/ext/exception_backtrace_spec.rb new file mode 100644 index 00000000000..cd24c9975c3 --- /dev/null +++ b/spec/datadog/di/ext/exception_backtrace_spec.rb @@ -0,0 +1,60 @@ +require "datadog/di/spec_helper" + +RSpec.describe 'exception_backtrace' do + subject(:backtrace) do + Datadog::DI.exception_backtrace(exception) + end + + context 'when exception has a backtrace' do + let(:exception) do + raise StandardError, 'test' + rescue => e + e + end + + it 'returns an array of strings' do + expect(backtrace).to be_an(Array) + expect(backtrace).not_to be_empty + expect(backtrace.first).to be_a(String) + expect(backtrace.first).to match(/\A.+:\d+:in\s/) + end + end + + context 'when exception has no backtrace' do + let(:exception) do + StandardError.new('no backtrace') + end + + it 'returns nil' do + expect(backtrace).to be_nil + end + end + + context 'when exception class overrides backtrace method' do + let(:exception_class) do + Class.new(StandardError) do + define_method(:backtrace) do + ['overridden'] + end + end + end + + let(:exception) do + begin + raise exception_class, 'test' + rescue => e + e + end + end + + it 'returns the real backtrace, not the overridden one' do + # The raw backtrace from the C extension bypasses the override. + expect(backtrace).to be_an(Array) + expect(backtrace).not_to eq(['overridden']) + expect(backtrace.first).to match(/\A.+:\d+:in\s/) + + # Verify the override exists on the Ruby side. + expect(exception.backtrace).to eq(['overridden']) + end + end +end diff --git a/spec/datadog/di/probe_notification_builder_spec.rb b/spec/datadog/di/probe_notification_builder_spec.rb index e3451a44c8c..8868929bac2 100644 --- a/spec/datadog/di/probe_notification_builder_spec.rb +++ b/spec/datadog/di/probe_notification_builder_spec.rb @@ -565,6 +565,47 @@ end end + context 'when exception has overridden backtrace method' do + let(:exception_class) do + Class.new(StandardError) do + define_method(:backtrace) do + ['overridden:0:in `fake_method\''] + end + end + end + + let(:exception) do + begin + raise exception_class, 'test' + rescue => e + e + end + end + + let(:context) do + Datadog::DI::Context.new( + probe: probe, + settings: settings, serializer: serializer, + target_self: target_self, + serialized_entry_args: {}, + return_value: nil, duration: 0.1, + exception: exception, + ) + end + + let(:payload) { builder.build_executed(context) } + + it 'uses raw backtrace, not overridden backtrace method' do + throwable = payload.dig(:debugger, :snapshot, :captures, :return, :throwable) + expect(throwable[:stacktrace]).to be_an(Array) + expect(throwable[:stacktrace]).not_to eq( + [{fileName: 'overridden', function: 'fake_method', lineNumber: 0}], + ) + # Verify the override exists on the Ruby side + expect(exception.backtrace).to eq(['overridden:0:in `fake_method\'']) + end + end + context 'when exception constructor argument is not a string' do let(:exception) { NameError.new(42) } From 59efad8fea37cfa33715f3bfa31a5180b0c43ec0 Mon Sep 17 00:00:00 2001 From: ddsign Date: Fri, 27 Mar 2026 14:32:53 -0400 Subject: [PATCH 02/16] Fix exception_backtrace to convert Thread::Backtrace to Array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The raw `bt` ivar on exceptions stores a Thread::Backtrace object, not an Array. Ruby's Exception#backtrace converts it lazily via rb_backtrace_to_str_ary. On newer Ruby versions, `bt` may even be nil with the actual data in `bt_locations` (lazy evaluation). The original implementation returned the raw ivar value, which caused: - Thread::Backtrace returned instead of Array (broke format_backtrace) - nil returned for raised exceptions on newer Ruby (lazy evaluation) Fix by replicating Ruby's conversion logic: 1. If bt is Array (set via set_backtrace), return as-is 2. If bt is Thread::Backtrace, convert via rb_backtrace_to_str_ary 3. If bt is nil, check bt_locations and convert if present rb_backtrace_p and rb_backtrace_to_str_ary are Ruby internal C functions (vm_backtrace.c), not customer code — safe to call from DI instrumentation context. Co-Authored-By: Claude --- ext/libdatadog_api/di.c | 70 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 7 deletions(-) diff --git a/ext/libdatadog_api/di.c b/ext/libdatadog_api/di.c index 40f800ad52e..3d025e9696f 100644 --- a/ext/libdatadog_api/di.c +++ b/ext/libdatadog_api/di.c @@ -9,6 +9,12 @@ void rb_objspace_each_objects( int (*callback)(void *start, void *end, size_t stride, void *data), void *data); +// Backtrace conversion functions from vm_backtrace.c. +// rb_backtrace_p returns true if the value is a Thread::Backtrace object. +// rb_backtrace_to_str_ary converts a Thread::Backtrace to Array. +int rb_backtrace_p(VALUE obj); +VALUE rb_backtrace_to_str_ary(VALUE self); + #define IMEMO_TYPE_ISEQ 7 // The ID value of the string "mesg" which is used in Ruby source as @@ -20,6 +26,12 @@ static ID id_mesg; // id_bt or idBt, and is used to set and retrieve the exception backtrace. static ID id_bt; +// The ID value of the string "bt_locations" which is used in Ruby source +// to store the Thread::Backtrace object for lazy backtrace evaluation. +// On newer Ruby versions, bt may be nil with the actual backtrace stored +// in bt_locations instead. +static ID id_bt_locations; + // Returns whether the argument is an IMEMO of type ISEQ. static bool ddtrace_imemo_iseq_p(VALUE v) { return rb_objspace_internal_object_p(v) && RB_TYPE_P(v, T_IMEMO) && ddtrace_imemo_type(v) == IMEMO_TYPE_ISEQ; @@ -78,23 +90,67 @@ static VALUE exception_message(DDTRACE_UNUSED VALUE _self, VALUE exception) { * call-seq: * DI.exception_backtrace(exception) -> Array | nil * - * Returns the raw backtrace stored on the exception object without - * invoking any Ruby-level method. + * Returns the backtrace stored on the exception object as an Array of + * Strings, without invoking any Ruby-level method on the exception. + * + * This reads the internal +bt+ and +bt_locations+ instance variables + * directly, bypassing any override of +Exception#backtrace+. This is + * important for DI instrumentation where we must not invoke customer code. + * + * Ruby stores the backtrace internally as a Thread::Backtrace object, + * not as an Array of Strings. The public Exception#backtrace method + * converts it lazily. This function performs the same conversion using + * rb_backtrace_to_str_ary (a Ruby internal C function, not customer code). + * + * Ruby version differences in internal backtrace storage: + * + * - Ruby 2.6: When +raise+ is called, Ruby sets +bt+ to a + * Thread::Backtrace object. +Exception#backtrace+ converts it to + * Array on first access and caches the result back in +bt+. + * + * - Ruby 3.2+: When +raise+ is called, Ruby sets +bt+ to +nil+ and + * stores the Thread::Backtrace in +bt_locations+ instead (lazy + * evaluation). +Exception#backtrace+ reads +bt_locations+, converts + * to Array, and caches in +bt+. * - * This reads the internal +bt+ instance variable directly, bypassing - * any override of +Exception#backtrace+. This is important for DI - * instrumentation where we must not invoke customer code. + * - All versions: +Exception#set_backtrace+ stores an Array + * directly in +bt+ (no Thread::Backtrace involved). * * @param exception [Exception] The exception object - * @return [Array, nil] The raw backtrace array, or nil if not set + * @return [Array, nil] The backtrace as an array of strings, + * or nil if no backtrace is set */ static VALUE exception_backtrace(DDTRACE_UNUSED VALUE _self, VALUE exception) { - return rb_ivar_get(exception, id_bt); + VALUE bt = rb_ivar_get(exception, id_bt); + + // Array: backtrace was set via Exception#set_backtrace, or was already + // materialized by a prior call to Exception#backtrace. All Ruby versions. + if (RB_TYPE_P(bt, T_ARRAY)) return bt; + + // Thread::Backtrace: Ruby 2.6–3.1 store the raw backtrace object in bt + // when raise is called, before Exception#backtrace materializes it. + if (rb_backtrace_p(bt)) { + return rb_backtrace_to_str_ary(bt); + } + + // nil: On Ruby 3.2+, bt starts as nil after raise. The actual backtrace + // is stored in bt_locations as a Thread::Backtrace (lazy evaluation). + // Also nil when no backtrace has been set (e.g. Exception.new without raise). + if (NIL_P(bt)) { + VALUE bt_locations = rb_ivar_get(exception, id_bt_locations); + if (!NIL_P(bt_locations) && rb_backtrace_p(bt_locations)) { + return rb_backtrace_to_str_ary(bt_locations); + } + } + + // No backtrace set (exception created without raise and without set_backtrace). + return Qnil; } void di_init(VALUE datadog_module) { id_mesg = rb_intern("mesg"); id_bt = rb_intern("bt"); + id_bt_locations = rb_intern("bt_locations"); VALUE di_module = rb_define_module_under(datadog_module, "DI"); rb_define_singleton_method(di_module, "all_iseqs", all_iseqs, 0); From 9b777e4d0f61a493f71ab5ec527b490302b7da5b Mon Sep 17 00:00:00 2001 From: ddsign Date: Fri, 27 Mar 2026 14:36:26 -0400 Subject: [PATCH 03/16] Fix StandardRB: remove redundant begin blocks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Style/RedundantBegin: the begin/rescue inside do/end blocks is redundant — the block itself can contain rescue directly. Co-Authored-By: Claude --- spec/datadog/di/ext/exception_backtrace_spec.rb | 8 +++----- spec/datadog/di/probe_notification_builder_spec.rb | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/spec/datadog/di/ext/exception_backtrace_spec.rb b/spec/datadog/di/ext/exception_backtrace_spec.rb index cd24c9975c3..d867a06c8ce 100644 --- a/spec/datadog/di/ext/exception_backtrace_spec.rb +++ b/spec/datadog/di/ext/exception_backtrace_spec.rb @@ -40,11 +40,9 @@ end let(:exception) do - begin - raise exception_class, 'test' - rescue => e - e - end + raise exception_class, 'test' + rescue => e + e end it 'returns the real backtrace, not the overridden one' do diff --git a/spec/datadog/di/probe_notification_builder_spec.rb b/spec/datadog/di/probe_notification_builder_spec.rb index 8868929bac2..6d6d4314a2b 100644 --- a/spec/datadog/di/probe_notification_builder_spec.rb +++ b/spec/datadog/di/probe_notification_builder_spec.rb @@ -575,11 +575,9 @@ end let(:exception) do - begin - raise exception_class, 'test' - rescue => e - e - end + raise exception_class, 'test' + rescue => e + e end let(:context) do From 5b5eb0ba7aa90b685253467f13d59ef8b299b487 Mon Sep 17 00:00:00 2001 From: ddsign Date: Fri, 27 Mar 2026 14:42:20 -0400 Subject: [PATCH 04/16] Add set_backtrace test and fix formatting in specs Add test for the Array code path in the C extension, exercised when Exception#set_backtrace has been called. This covers the RB_TYPE_P(bt, T_ARRAY) early return that wasn't previously tested. Also fix formatting: split keyword args onto separate lines for consistency in probe_notification_builder_spec.rb. Co-Authored-By: Claude --- spec/datadog/di/ext/exception_backtrace_spec.rb | 12 ++++++++++++ spec/datadog/di/probe_notification_builder_spec.rb | 6 ++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/spec/datadog/di/ext/exception_backtrace_spec.rb b/spec/datadog/di/ext/exception_backtrace_spec.rb index d867a06c8ce..32ab6589b21 100644 --- a/spec/datadog/di/ext/exception_backtrace_spec.rb +++ b/spec/datadog/di/ext/exception_backtrace_spec.rb @@ -30,6 +30,18 @@ end end + context 'when backtrace was set via set_backtrace' do + let(:exception) do + StandardError.new('test').tap do |e| + e.set_backtrace(['custom:1:in `foo\'', 'custom:2:in `bar\'']) + end + end + + it 'returns the set backtrace array' do + expect(backtrace).to eq(['custom:1:in `foo\'', 'custom:2:in `bar\'']) + end + end + context 'when exception class overrides backtrace method' do let(:exception_class) do Class.new(StandardError) do diff --git a/spec/datadog/di/probe_notification_builder_spec.rb b/spec/datadog/di/probe_notification_builder_spec.rb index 6d6d4314a2b..f995d49cf7f 100644 --- a/spec/datadog/di/probe_notification_builder_spec.rb +++ b/spec/datadog/di/probe_notification_builder_spec.rb @@ -583,10 +583,12 @@ let(:context) do Datadog::DI::Context.new( probe: probe, - settings: settings, serializer: serializer, + settings: settings, + serializer: serializer, target_self: target_self, serialized_entry_args: {}, - return_value: nil, duration: 0.1, + return_value: nil, + duration: 0.1, exception: exception, ) end From 9801c9989c7dc3c055bbe8cda7cb4452c4c7b83d Mon Sep 17 00:00:00 2001 From: ddsign Date: Fri, 27 Mar 2026 17:54:22 -0400 Subject: [PATCH 05/16] Fix undefined symbol: use UnboundMethod instead of internal Ruby functions rb_backtrace_p and rb_backtrace_to_str_ary are not exported symbols in Ruby's shared library, causing "undefined symbol: rb_backtrace_p" at runtime on all Ruby versions. Replace with UnboundMethod approach: capture Exception.instance_method(:backtrace) once at init time, then use bind+call to invoke the original C implementation on any exception. This bypasses customer overrides (the UnboundMethod is captured from Exception itself) while using only public Ruby API. Uses bind + call (not bind_call) for Ruby 2.6 compatibility. The UnboundMethod is registered with rb_gc_register_mark_object to prevent GC collection. Co-Authored-By: Claude --- ext/libdatadog_api/di.c | 102 +++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 60 deletions(-) diff --git a/ext/libdatadog_api/di.c b/ext/libdatadog_api/di.c index 3d025e9696f..90edfb8fcf0 100644 --- a/ext/libdatadog_api/di.c +++ b/ext/libdatadog_api/di.c @@ -9,12 +9,6 @@ void rb_objspace_each_objects( int (*callback)(void *start, void *end, size_t stride, void *data), void *data); -// Backtrace conversion functions from vm_backtrace.c. -// rb_backtrace_p returns true if the value is a Thread::Backtrace object. -// rb_backtrace_to_str_ary converts a Thread::Backtrace to Array. -int rb_backtrace_p(VALUE obj); -VALUE rb_backtrace_to_str_ary(VALUE self); - #define IMEMO_TYPE_ISEQ 7 // The ID value of the string "mesg" which is used in Ruby source as @@ -22,15 +16,10 @@ VALUE rb_backtrace_to_str_ary(VALUE self); // from standard library exception classes like NameError. static ID id_mesg; -// The ID value of the string "bt" which is used in Ruby source as -// id_bt or idBt, and is used to set and retrieve the exception backtrace. -static ID id_bt; - -// The ID value of the string "bt_locations" which is used in Ruby source -// to store the Thread::Backtrace object for lazy backtrace evaluation. -// On newer Ruby versions, bt may be nil with the actual backtrace stored -// in bt_locations instead. -static ID id_bt_locations; +// Cached UnboundMethod for Exception#backtrace, used to call the original +// C implementation without dispatching through the method table (which +// would invoke customer overrides). Initialized once in di_init. +static VALUE exception_backtrace_unbound_method; // Returns whether the argument is an IMEMO of type ISEQ. static bool ddtrace_imemo_iseq_p(VALUE v) { @@ -90,67 +79,60 @@ static VALUE exception_message(DDTRACE_UNUSED VALUE _self, VALUE exception) { * call-seq: * DI.exception_backtrace(exception) -> Array | nil * - * Returns the backtrace stored on the exception object as an Array of - * Strings, without invoking any Ruby-level method on the exception. + * Returns the backtrace of the exception as an Array of Strings, without + * invoking any Ruby-level method on the exception object itself. * - * This reads the internal +bt+ and +bt_locations+ instance variables - * directly, bypassing any override of +Exception#backtrace+. This is - * important for DI instrumentation where we must not invoke customer code. + * This is important for DI instrumentation where we must not invoke + * customer code. If a customer subclass overrides +Exception#backtrace+, + * calling +exception.backtrace+ would dispatch to the override. This + * method bypasses that by calling the original +Exception#backtrace+ + * implementation directly via an UnboundMethod captured at init time. * - * Ruby stores the backtrace internally as a Thread::Backtrace object, - * not as an Array of Strings. The public Exception#backtrace method - * converts it lazily. This function performs the same conversion using - * rb_backtrace_to_str_ary (a Ruby internal C function, not customer code). + * Implementation: at init time, we capture + * +Exception.instance_method(:backtrace)+ as an UnboundMethod. At call + * time, we bind it to the exception and call it. This invokes the + * original C implementation of +Exception#backtrace+ (defined in + * Ruby's error.c), which handles all Ruby version differences in + * internal backtrace storage: * - * Ruby version differences in internal backtrace storage: + * - Ruby 2.6–3.1: +bt+ ivar holds a Thread::Backtrace object after + * +raise+. +Exception#backtrace+ converts it to Array. * - * - Ruby 2.6: When +raise+ is called, Ruby sets +bt+ to a - * Thread::Backtrace object. +Exception#backtrace+ converts it to - * Array on first access and caches the result back in +bt+. + * - Ruby 3.2+: +bt+ is nil after +raise+; actual backtrace is in + * +bt_locations+. +Exception#backtrace+ reads and converts it. * - * - Ruby 3.2+: When +raise+ is called, Ruby sets +bt+ to +nil+ and - * stores the Thread::Backtrace in +bt_locations+ instead (lazy - * evaluation). +Exception#backtrace+ reads +bt_locations+, converts - * to Array, and caches in +bt+. + * - All versions: +Exception#set_backtrace+ stores Array + * directly in +bt+. * - * - All versions: +Exception#set_backtrace+ stores an Array - * directly in +bt+ (no Thread::Backtrace involved). + * Using bind+call on the UnboundMethod is safe: it only invokes Ruby + * stdlib code (the original Exception#backtrace C function), not + * customer code. The UnboundMethod is captured once from Exception + * itself, so even if a subclass overrides backtrace, bind_call still + * dispatches to the original. * * @param exception [Exception] The exception object * @return [Array, nil] The backtrace as an array of strings, * or nil if no backtrace is set */ static VALUE exception_backtrace(DDTRACE_UNUSED VALUE _self, VALUE exception) { - VALUE bt = rb_ivar_get(exception, id_bt); - - // Array: backtrace was set via Exception#set_backtrace, or was already - // materialized by a prior call to Exception#backtrace. All Ruby versions. - if (RB_TYPE_P(bt, T_ARRAY)) return bt; - - // Thread::Backtrace: Ruby 2.6–3.1 store the raw backtrace object in bt - // when raise is called, before Exception#backtrace materializes it. - if (rb_backtrace_p(bt)) { - return rb_backtrace_to_str_ary(bt); - } - - // nil: On Ruby 3.2+, bt starts as nil after raise. The actual backtrace - // is stored in bt_locations as a Thread::Backtrace (lazy evaluation). - // Also nil when no backtrace has been set (e.g. Exception.new without raise). - if (NIL_P(bt)) { - VALUE bt_locations = rb_ivar_get(exception, id_bt_locations); - if (!NIL_P(bt_locations) && rb_backtrace_p(bt_locations)) { - return rb_backtrace_to_str_ary(bt_locations); - } - } - - // No backtrace set (exception created without raise and without set_backtrace). - return Qnil; + // Use bind + call (not bind_call) for Ruby 2.6 compatibility. + // bind_call was added in Ruby 2.7. + VALUE bound = rb_funcall(exception_backtrace_unbound_method, + rb_intern("bind"), 1, exception); + return rb_funcall(bound, rb_intern("call"), 0); } void di_init(VALUE datadog_module) { id_mesg = rb_intern("mesg"); - id_bt = rb_intern("bt"); - id_bt_locations = rb_intern("bt_locations"); + + // Capture Exception.instance_method(:backtrace) once at init time. + // This UnboundMethod points to the original C implementation in error.c + // and will not be affected by subclass overrides. + exception_backtrace_unbound_method = rb_funcall( + rb_eException, rb_intern("instance_method"), 1, + ID2SYM(rb_intern("backtrace"))); + // Prevent GC from collecting the cached UnboundMethod. + rb_gc_register_mark_object(exception_backtrace_unbound_method); VALUE di_module = rb_define_module_under(datadog_module, "DI"); rb_define_singleton_method(di_module, "all_iseqs", all_iseqs, 0); From a1c75f40d09b8609776421412345797500a8277f Mon Sep 17 00:00:00 2001 From: ddsign Date: Fri, 27 Mar 2026 18:12:46 -0400 Subject: [PATCH 06/16] Fix undefined symbol: use have_func to gate rb_backtrace_p MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit rb_backtrace_p and rb_backtrace_to_str_ary are internal Ruby functions (vm_backtrace.c) that may not be exported as dynamic symbols. The previous commit declared prototypes manually, which compiled but failed at runtime with "undefined symbol: rb_backtrace_p" on all Ruby versions. Fix: use have_func('rb_backtrace_p') in extconf.rb to detect symbol availability at compile time. When available, read the bt ivar directly and convert via rb_backtrace_to_str_ary — no Ruby method dispatch at all. When unavailable, fall back to calling Exception#backtrace via an UnboundMethod captured from Exception at init time, which invokes the original exc_backtrace (error.c) regardless of subclass overrides. The bt ivar after raise holds a Thread::Backtrace object. Ruby's exc_backtrace converts it to Array via rb_backtrace_to_str_ary. If set via Exception#set_backtrace, bt already holds an Array. Co-Authored-By: Claude --- ext/libdatadog_api/di.c | 80 +++++++++++++++++++++++------------ ext/libdatadog_api/extconf.rb | 2 + 2 files changed, 55 insertions(+), 27 deletions(-) diff --git a/ext/libdatadog_api/di.c b/ext/libdatadog_api/di.c index 90edfb8fcf0..b7c464ce83e 100644 --- a/ext/libdatadog_api/di.c +++ b/ext/libdatadog_api/di.c @@ -9,6 +9,14 @@ void rb_objspace_each_objects( int (*callback)(void *start, void *end, size_t stride, void *data), void *data); +#ifdef HAVE_RB_BACKTRACE_P +// Backtrace conversion functions from vm_backtrace.c. +// Only available on Ruby builds that export these symbols (detected +// by have_func in extconf.rb). +int rb_backtrace_p(VALUE obj); +VALUE rb_backtrace_to_str_ary(VALUE self); +#endif + #define IMEMO_TYPE_ISEQ 7 // The ID value of the string "mesg" which is used in Ruby source as @@ -16,10 +24,17 @@ void rb_objspace_each_objects( // from standard library exception classes like NameError. static ID id_mesg; -// Cached UnboundMethod for Exception#backtrace, used to call the original -// C implementation without dispatching through the method table (which -// would invoke customer overrides). Initialized once in di_init. +// The ID value of the string "bt" which is used in Ruby source as +// id_bt or idBt, and is used to set and retrieve the exception backtrace. +static ID id_bt; + +#ifndef HAVE_RB_BACKTRACE_P +// Fallback: cached UnboundMethod for Exception#backtrace, used when +// rb_backtrace_p/rb_backtrace_to_str_ary are not exported by Ruby. +// Calls the original C implementation without dispatching through the +// method table (which would invoke customer overrides). static VALUE exception_backtrace_unbound_method; +#endif // Returns whether the argument is an IMEMO of type ISEQ. static bool ddtrace_imemo_iseq_p(VALUE v) { @@ -80,59 +95,70 @@ static VALUE exception_message(DDTRACE_UNUSED VALUE _self, VALUE exception) { * DI.exception_backtrace(exception) -> Array | nil * * Returns the backtrace of the exception as an Array of Strings, without - * invoking any Ruby-level method on the exception object itself. + * dispatching through the exception's method table. * * This is important for DI instrumentation where we must not invoke * customer code. If a customer subclass overrides +Exception#backtrace+, - * calling +exception.backtrace+ would dispatch to the override. This - * method bypasses that by calling the original +Exception#backtrace+ - * implementation directly via an UnboundMethod captured at init time. + * calling +exception.backtrace+ would dispatch to the override. * - * Implementation: at init time, we capture - * +Exception.instance_method(:backtrace)+ as an UnboundMethod. At call - * time, we bind it to the exception and call it. This invokes the - * original C implementation of +Exception#backtrace+ (defined in - * Ruby's error.c), which handles all Ruby version differences in - * internal backtrace storage: + * Two strategies, selected at compile time by have_func: * - * - Ruby 2.6–3.1: +bt+ ivar holds a Thread::Backtrace object after - * +raise+. +Exception#backtrace+ converts it to Array. + * 1. If rb_backtrace_p is exported: read the +bt+ ivar directly and + * convert via rb_backtrace_to_str_ary. No Ruby method dispatch at all. * - * - Ruby 3.2+: +bt+ is nil after +raise+; actual backtrace is in - * +bt_locations+. +Exception#backtrace+ reads and converts it. + * 2. Fallback: call Exception#backtrace via an UnboundMethod captured + * from Exception at init time. This invokes the original C + * implementation (exc_backtrace in error.c) regardless of subclass + * overrides. Uses bind+call (not bind_call) for Ruby 2.6 compat. * - * - All versions: +Exception#set_backtrace+ stores Array - * directly in +bt+. + * In both cases, only Ruby stdlib C code executes — never customer code. * - * Using bind+call on the UnboundMethod is safe: it only invokes Ruby - * stdlib code (the original Exception#backtrace C function), not - * customer code. The UnboundMethod is captured once from Exception - * itself, so even if a subclass overrides backtrace, bind_call still - * dispatches to the original. + * The +bt+ ivar after +raise+ contains a Thread::Backtrace object. + * Ruby's exc_backtrace (error.c) converts it to Array via + * rb_backtrace_to_str_ary (vm_backtrace.c). If set via + * +Exception#set_backtrace+, +bt+ already holds an Array. * * @param exception [Exception] The exception object * @return [Array, nil] The backtrace as an array of strings, * or nil if no backtrace is set */ static VALUE exception_backtrace(DDTRACE_UNUSED VALUE _self, VALUE exception) { - // Use bind + call (not bind_call) for Ruby 2.6 compatibility. - // bind_call was added in Ruby 2.7. +#ifdef HAVE_RB_BACKTRACE_P + VALUE bt = rb_ivar_get(exception, id_bt); + + // Array: set via Exception#set_backtrace, or already materialized + // by a prior call to Exception#backtrace. + if (RB_TYPE_P(bt, T_ARRAY)) return bt; + + // Thread::Backtrace: raw backtrace object stored by raise. + // Convert to Array via rb_backtrace_to_str_ary. + if (rb_backtrace_p(bt)) { + return rb_backtrace_to_str_ary(bt); + } + + // nil: no backtrace set (Exception.new without raise). + return Qnil; +#else + // Fallback: call the original Exception#backtrace via UnboundMethod. VALUE bound = rb_funcall(exception_backtrace_unbound_method, rb_intern("bind"), 1, exception); return rb_funcall(bound, rb_intern("call"), 0); +#endif } void di_init(VALUE datadog_module) { id_mesg = rb_intern("mesg"); + id_bt = rb_intern("bt"); +#ifndef HAVE_RB_BACKTRACE_P // Capture Exception.instance_method(:backtrace) once at init time. // This UnboundMethod points to the original C implementation in error.c // and will not be affected by subclass overrides. exception_backtrace_unbound_method = rb_funcall( rb_eException, rb_intern("instance_method"), 1, ID2SYM(rb_intern("backtrace"))); - // Prevent GC from collecting the cached UnboundMethod. rb_gc_register_mark_object(exception_backtrace_unbound_method); +#endif VALUE di_module = rb_define_module_under(datadog_module, "DI"); rb_define_singleton_method(di_module, "all_iseqs", all_iseqs, 0); diff --git a/ext/libdatadog_api/extconf.rb b/ext/libdatadog_api/extconf.rb index 475b9daa615..3661d9a8b55 100644 --- a/ext/libdatadog_api/extconf.rb +++ b/ext/libdatadog_api/extconf.rb @@ -89,6 +89,8 @@ def skip_building_extension!(reason) # When requiring, we need to use the exact same string, including the version and the platform. EXTENSION_NAME = "libdatadog_api.#{RUBY_VERSION[/\d+.\d+/]}_#{RUBY_PLATFORM}".freeze +have_func('rb_backtrace_p') + create_makefile(EXTENSION_NAME) # rubocop:enable Style/GlobalVars From 4f8e5038130bb7981541c6db59a69ff207929b78 Mon Sep 17 00:00:00 2001 From: ddsign Date: Fri, 27 Mar 2026 19:11:46 -0400 Subject: [PATCH 07/16] Replace C exception_backtrace with Ruby UnboundMethod + backtrace_locations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove all C code for exception backtrace (rb_backtrace_p, have_func guard, UnboundMethod fallback in di.c). The conversion functions (rb_backtrace_to_str_ary, rb_backtrace_to_location_ary) are not exported from libruby.so due to missing RUBY_SYMBOL_EXPORT markers in internal/vm.h. Reimplementing via private VM headers is correct but too much work for the gain. Instead, capture Exception.instance_method(:backtrace_locations) as an UnboundMethod at load time. bind(exception).call bypasses subclass overrides — the practical threat model. Does not protect against monkeypatching Exception itself before dd-trace-rb loads. Switch from backtrace (Array) to backtrace_locations (Array). DI was regex-parsing the formatted strings back into path/lineno/label — a pointless round-trip. Location objects provide these directly. backtrace_locations available since Ruby 2.6, DI requires 2.6+. Co-Authored-By: Claude Sonnet 4.6 --- ext/libdatadog_api/di.c | 88 ------------------- ext/libdatadog_api/extconf.rb | 2 - lib/datadog/di.rb | 34 +++++++ lib/datadog/di/probe_notification_builder.rb | 28 +++--- .../di/ext/exception_backtrace_spec.rb | 33 +++---- 5 files changed, 56 insertions(+), 129 deletions(-) diff --git a/ext/libdatadog_api/di.c b/ext/libdatadog_api/di.c index b7c464ce83e..35f9e199e4d 100644 --- a/ext/libdatadog_api/di.c +++ b/ext/libdatadog_api/di.c @@ -9,14 +9,6 @@ void rb_objspace_each_objects( int (*callback)(void *start, void *end, size_t stride, void *data), void *data); -#ifdef HAVE_RB_BACKTRACE_P -// Backtrace conversion functions from vm_backtrace.c. -// Only available on Ruby builds that export these symbols (detected -// by have_func in extconf.rb). -int rb_backtrace_p(VALUE obj); -VALUE rb_backtrace_to_str_ary(VALUE self); -#endif - #define IMEMO_TYPE_ISEQ 7 // The ID value of the string "mesg" which is used in Ruby source as @@ -24,18 +16,6 @@ VALUE rb_backtrace_to_str_ary(VALUE self); // from standard library exception classes like NameError. static ID id_mesg; -// The ID value of the string "bt" which is used in Ruby source as -// id_bt or idBt, and is used to set and retrieve the exception backtrace. -static ID id_bt; - -#ifndef HAVE_RB_BACKTRACE_P -// Fallback: cached UnboundMethod for Exception#backtrace, used when -// rb_backtrace_p/rb_backtrace_to_str_ary are not exported by Ruby. -// Calls the original C implementation without dispatching through the -// method table (which would invoke customer overrides). -static VALUE exception_backtrace_unbound_method; -#endif - // Returns whether the argument is an IMEMO of type ISEQ. static bool ddtrace_imemo_iseq_p(VALUE v) { return rb_objspace_internal_object_p(v) && RB_TYPE_P(v, T_IMEMO) && ddtrace_imemo_type(v) == IMEMO_TYPE_ISEQ; @@ -90,78 +70,10 @@ static VALUE exception_message(DDTRACE_UNUSED VALUE _self, VALUE exception) { return rb_ivar_get(exception, id_mesg); } -/* - * call-seq: - * DI.exception_backtrace(exception) -> Array | nil - * - * Returns the backtrace of the exception as an Array of Strings, without - * dispatching through the exception's method table. - * - * This is important for DI instrumentation where we must not invoke - * customer code. If a customer subclass overrides +Exception#backtrace+, - * calling +exception.backtrace+ would dispatch to the override. - * - * Two strategies, selected at compile time by have_func: - * - * 1. If rb_backtrace_p is exported: read the +bt+ ivar directly and - * convert via rb_backtrace_to_str_ary. No Ruby method dispatch at all. - * - * 2. Fallback: call Exception#backtrace via an UnboundMethod captured - * from Exception at init time. This invokes the original C - * implementation (exc_backtrace in error.c) regardless of subclass - * overrides. Uses bind+call (not bind_call) for Ruby 2.6 compat. - * - * In both cases, only Ruby stdlib C code executes — never customer code. - * - * The +bt+ ivar after +raise+ contains a Thread::Backtrace object. - * Ruby's exc_backtrace (error.c) converts it to Array via - * rb_backtrace_to_str_ary (vm_backtrace.c). If set via - * +Exception#set_backtrace+, +bt+ already holds an Array. - * - * @param exception [Exception] The exception object - * @return [Array, nil] The backtrace as an array of strings, - * or nil if no backtrace is set - */ -static VALUE exception_backtrace(DDTRACE_UNUSED VALUE _self, VALUE exception) { -#ifdef HAVE_RB_BACKTRACE_P - VALUE bt = rb_ivar_get(exception, id_bt); - - // Array: set via Exception#set_backtrace, or already materialized - // by a prior call to Exception#backtrace. - if (RB_TYPE_P(bt, T_ARRAY)) return bt; - - // Thread::Backtrace: raw backtrace object stored by raise. - // Convert to Array via rb_backtrace_to_str_ary. - if (rb_backtrace_p(bt)) { - return rb_backtrace_to_str_ary(bt); - } - - // nil: no backtrace set (Exception.new without raise). - return Qnil; -#else - // Fallback: call the original Exception#backtrace via UnboundMethod. - VALUE bound = rb_funcall(exception_backtrace_unbound_method, - rb_intern("bind"), 1, exception); - return rb_funcall(bound, rb_intern("call"), 0); -#endif -} - void di_init(VALUE datadog_module) { id_mesg = rb_intern("mesg"); - id_bt = rb_intern("bt"); - -#ifndef HAVE_RB_BACKTRACE_P - // Capture Exception.instance_method(:backtrace) once at init time. - // This UnboundMethod points to the original C implementation in error.c - // and will not be affected by subclass overrides. - exception_backtrace_unbound_method = rb_funcall( - rb_eException, rb_intern("instance_method"), 1, - ID2SYM(rb_intern("backtrace"))); - rb_gc_register_mark_object(exception_backtrace_unbound_method); -#endif VALUE di_module = rb_define_module_under(datadog_module, "DI"); rb_define_singleton_method(di_module, "all_iseqs", all_iseqs, 0); rb_define_singleton_method(di_module, "exception_message", exception_message, 1); - rb_define_singleton_method(di_module, "exception_backtrace", exception_backtrace, 1); } diff --git a/ext/libdatadog_api/extconf.rb b/ext/libdatadog_api/extconf.rb index 3661d9a8b55..475b9daa615 100644 --- a/ext/libdatadog_api/extconf.rb +++ b/ext/libdatadog_api/extconf.rb @@ -89,8 +89,6 @@ def skip_building_extension!(reason) # When requiring, we need to use the exact same string, including the version and the platform. EXTENSION_NAME = "libdatadog_api.#{RUBY_VERSION[/\d+.\d+/]}_#{RUBY_PLATFORM}".freeze -have_func('rb_backtrace_p') - create_makefile(EXTENSION_NAME) # rubocop:enable Style/GlobalVars diff --git a/lib/datadog/di.rb b/lib/datadog/di.rb index fbe144998aa..6cd902155fe 100644 --- a/lib/datadog/di.rb +++ b/lib/datadog/di.rb @@ -11,11 +11,45 @@ module Datadog module DI INSTRUMENTED_COUNTERS_LOCK = Mutex.new + # Captured at load time from Exception itself (not a subclass). + # Used by exception_backtrace to bypass subclass overrides of + # backtrace_locations. + # + # This does NOT protect against monkeypatching Exception#backtrace_locations + # before dd-trace-rb loads — in that case we'd capture the monkeypatch. + # The practical threat model is customer subclasses overriding the method: + # + # class MyError < StandardError + # def backtrace_locations; []; end + # end + # + # The UnboundMethod bypasses subclass overrides: bind(exception).call + # always dispatches to the original Exception implementation. + EXCEPTION_BACKTRACE_LOCATIONS = Exception.instance_method(:backtrace_locations) + class << self def enabled? Datadog.configuration.dynamic_instrumentation.enabled end + # Returns the backtrace of an exception as an Array of + # Thread::Backtrace::Location objects, without dispatching through + # the exception's method table. + # + # DI instrumentation runs inside customer application methods and + # must never invoke customer code. Calling exception.backtrace_locations + # directly would dispatch through the method table, hitting any + # subclass override. The UnboundMethod captured at load time from + # Exception itself bypasses subclass method tables entirely. + # + # Returns nil if no backtrace is set (Exception.new without raise). + # + # @param exception [Exception] + # @return [Array, nil] + def exception_backtrace(exception) + EXCEPTION_BACKTRACE_LOCATIONS.bind(exception).call + end + # Returns iseqs that correspond to loaded files (filtering out eval'd code). # # There are several types of iseqs returned by +all_iseqs+: diff --git a/lib/datadog/di/probe_notification_builder.rb b/lib/datadog/di/probe_notification_builder.rb index bbed71b1c28..0a5fe4c7534 100644 --- a/lib/datadog/di/probe_notification_builder.rb +++ b/lib/datadog/di/probe_notification_builder.rb @@ -56,10 +56,6 @@ def build_executed(context) NANOSECONDS = 1_000_000_000 MILLISECONDS = 1000 - # Matches Ruby backtrace frame format: "/path/file.rb:42:in `method_name'" - # Captures: $1 = file path, $2 = line number, $3 = method name - BACKTRACE_FRAME_PATTERN = /\A(.+):(\d+):in\s+[`'](.+)'\z/ - def build_snapshot(context) probe = context.probe @@ -200,22 +196,20 @@ def serialize_throwable(exception) } end - # Parses Ruby backtrace strings into the stack frame format + # Converts backtrace locations into the stack frame format # expected by the Datadog UI. # - # Ruby backtrace format: "/path/file.rb:42:in `method_name'" + # Uses Thread::Backtrace::Location objects which provide structured + # path/lineno/label directly, avoiding the round-trip of formatting + # to strings and regex-parsing back. # - # @param backtrace [Array, nil] from Exception#backtrace - # @return [Array, nil] - def format_backtrace(backtrace) - return [] if backtrace.nil? - - backtrace.map do |frame| - if frame =~ BACKTRACE_FRAME_PATTERN - {fileName: $1, function: $3, lineNumber: $2.to_i} - else - {fileName: frame, function: '', lineNumber: 0} - end + # @param locations [Array, nil] + # @return [Array] + def format_backtrace(locations) + return [] if locations.nil? + + locations.map do |loc| + {fileName: loc.path, function: loc.label, lineNumber: loc.lineno} end end diff --git a/spec/datadog/di/ext/exception_backtrace_spec.rb b/spec/datadog/di/ext/exception_backtrace_spec.rb index 32ab6589b21..10faf43741f 100644 --- a/spec/datadog/di/ext/exception_backtrace_spec.rb +++ b/spec/datadog/di/ext/exception_backtrace_spec.rb @@ -12,11 +12,12 @@ e end - it 'returns an array of strings' do + it 'returns an array of Thread::Backtrace::Location' do expect(backtrace).to be_an(Array) expect(backtrace).not_to be_empty - expect(backtrace.first).to be_a(String) - expect(backtrace.first).to match(/\A.+:\d+:in\s/) + expect(backtrace.first).to be_a(Thread::Backtrace::Location) + expect(backtrace.first.path).to be_a(String) + expect(backtrace.first.lineno).to be_a(Integer) end end @@ -30,23 +31,11 @@ end end - context 'when backtrace was set via set_backtrace' do - let(:exception) do - StandardError.new('test').tap do |e| - e.set_backtrace(['custom:1:in `foo\'', 'custom:2:in `bar\'']) - end - end - - it 'returns the set backtrace array' do - expect(backtrace).to eq(['custom:1:in `foo\'', 'custom:2:in `bar\'']) - end - end - - context 'when exception class overrides backtrace method' do + context 'when exception class overrides backtrace_locations method' do let(:exception_class) do Class.new(StandardError) do - define_method(:backtrace) do - ['overridden'] + define_method(:backtrace_locations) do + [] end end end @@ -58,13 +47,13 @@ end it 'returns the real backtrace, not the overridden one' do - # The raw backtrace from the C extension bypasses the override. + # The UnboundMethod bypasses the subclass override. expect(backtrace).to be_an(Array) - expect(backtrace).not_to eq(['overridden']) - expect(backtrace.first).to match(/\A.+:\d+:in\s/) + expect(backtrace).not_to be_empty + expect(backtrace.first).to be_a(Thread::Backtrace::Location) # Verify the override exists on the Ruby side. - expect(exception.backtrace).to eq(['overridden']) + expect(exception.backtrace_locations).to eq([]) end end end From 02037d2407909f48718a02a5e7115562d8753008 Mon Sep 17 00:00:00 2001 From: ddsign Date: Fri, 27 Mar 2026 19:20:54 -0400 Subject: [PATCH 08/16] Fix RBS signature: exception_backtrace returns Location not String Co-Authored-By: Claude Sonnet 4.6 --- sig/datadog/di.rbs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sig/datadog/di.rbs b/sig/datadog/di.rbs index 20e20716b02..7e2efe7bd11 100644 --- a/sig/datadog/di.rbs +++ b/sig/datadog/di.rbs @@ -10,7 +10,7 @@ module Datadog def self.all_iseqs: () -> Array[RubyVM::InstructionSequence] def self.file_iseqs: () -> Array[RubyVM::InstructionSequence] def self.exception_message: (Exception exception) -> untyped - def self.exception_backtrace: (Exception exception) -> Array[String]? + def self.exception_backtrace: (Exception exception) -> Array[Thread::Backtrace::Location]? def self.component: () -> Component From 95541ba3696849a17cbb9db36c14bae0ed8a5587 Mon Sep 17 00:00:00 2001 From: ddsign Date: Fri, 27 Mar 2026 19:22:09 -0400 Subject: [PATCH 09/16] Inline exception_backtrace: use constant directly at call site No wrapper method needed. EXCEPTION_BACKTRACE_LOCATIONS.bind(exc).call is called directly in probe_notification_builder.rb. Co-Authored-By: Claude Sonnet 4.6 --- lib/datadog/di.rb | 21 +------------------ lib/datadog/di/probe_notification_builder.rb | 2 +- sig/datadog/di.rbs | 2 +- .../di/ext/exception_backtrace_spec.rb | 4 ++-- 4 files changed, 5 insertions(+), 24 deletions(-) diff --git a/lib/datadog/di.rb b/lib/datadog/di.rb index 6cd902155fe..f09e9d00db0 100644 --- a/lib/datadog/di.rb +++ b/lib/datadog/di.rb @@ -12,8 +12,7 @@ module DI INSTRUMENTED_COUNTERS_LOCK = Mutex.new # Captured at load time from Exception itself (not a subclass). - # Used by exception_backtrace to bypass subclass overrides of - # backtrace_locations. + # Used to bypass subclass overrides of backtrace_locations. # # This does NOT protect against monkeypatching Exception#backtrace_locations # before dd-trace-rb loads — in that case we'd capture the monkeypatch. @@ -32,24 +31,6 @@ def enabled? Datadog.configuration.dynamic_instrumentation.enabled end - # Returns the backtrace of an exception as an Array of - # Thread::Backtrace::Location objects, without dispatching through - # the exception's method table. - # - # DI instrumentation runs inside customer application methods and - # must never invoke customer code. Calling exception.backtrace_locations - # directly would dispatch through the method table, hitting any - # subclass override. The UnboundMethod captured at load time from - # Exception itself bypasses subclass method tables entirely. - # - # Returns nil if no backtrace is set (Exception.new without raise). - # - # @param exception [Exception] - # @return [Array, nil] - def exception_backtrace(exception) - EXCEPTION_BACKTRACE_LOCATIONS.bind(exception).call - end - # Returns iseqs that correspond to loaded files (filtering out eval'd code). # # There are several types of iseqs returned by +all_iseqs+: diff --git a/lib/datadog/di/probe_notification_builder.rb b/lib/datadog/di/probe_notification_builder.rb index 0a5fe4c7534..c16abcab689 100644 --- a/lib/datadog/di/probe_notification_builder.rb +++ b/lib/datadog/di/probe_notification_builder.rb @@ -192,7 +192,7 @@ def serialize_throwable(exception) { type: exception.class.name, message: message, - stacktrace: format_backtrace(DI.exception_backtrace(exception)), + stacktrace: format_backtrace(DI::EXCEPTION_BACKTRACE_LOCATIONS.bind(exception).call), } end diff --git a/sig/datadog/di.rbs b/sig/datadog/di.rbs index 7e2efe7bd11..a6ede3d9e98 100644 --- a/sig/datadog/di.rbs +++ b/sig/datadog/di.rbs @@ -10,7 +10,7 @@ module Datadog def self.all_iseqs: () -> Array[RubyVM::InstructionSequence] def self.file_iseqs: () -> Array[RubyVM::InstructionSequence] def self.exception_message: (Exception exception) -> untyped - def self.exception_backtrace: (Exception exception) -> Array[Thread::Backtrace::Location]? + EXCEPTION_BACKTRACE_LOCATIONS: UnboundMethod def self.component: () -> Component diff --git a/spec/datadog/di/ext/exception_backtrace_spec.rb b/spec/datadog/di/ext/exception_backtrace_spec.rb index 10faf43741f..12895384d7c 100644 --- a/spec/datadog/di/ext/exception_backtrace_spec.rb +++ b/spec/datadog/di/ext/exception_backtrace_spec.rb @@ -1,8 +1,8 @@ require "datadog/di/spec_helper" -RSpec.describe 'exception_backtrace' do +RSpec.describe 'EXCEPTION_BACKTRACE_LOCATIONS' do subject(:backtrace) do - Datadog::DI.exception_backtrace(exception) + Datadog::DI::EXCEPTION_BACKTRACE_LOCATIONS.bind(exception).call end context 'when exception has a backtrace' do From c98fb09a76242393de7af482a95fe69ea424beb9 Mon Sep 17 00:00:00 2001 From: ddsign Date: Fri, 27 Mar 2026 19:27:26 -0400 Subject: [PATCH 10/16] Fix Steep: update RBS for format_backtrace and remove BACKTRACE_FRAME_PATTERN Co-Authored-By: Claude Sonnet 4.6 --- sig/datadog/di/probe_notification_builder.rbs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sig/datadog/di/probe_notification_builder.rbs b/sig/datadog/di/probe_notification_builder.rbs index fb14c2c2f34..2fe963a68d4 100644 --- a/sig/datadog/di/probe_notification_builder.rbs +++ b/sig/datadog/di/probe_notification_builder.rbs @@ -3,7 +3,6 @@ module Datadog class ProbeNotificationBuilder NANOSECONDS: Integer MILLISECONDS: Integer - BACKTRACE_FRAME_PATTERN: Regexp @serializer: Serializer @@ -26,7 +25,7 @@ module Datadog def build_snapshot: (Context context) -> Hash[Symbol,untyped] def serialize_throwable: (Exception exception) -> Hash[Symbol, String? | Array[Hash[Symbol, String | Integer | nil]]?] - def format_backtrace: (Array[String]? backtrace) -> Array[Hash[Symbol, String | Integer | nil]] + def format_backtrace: (Array[Thread::Backtrace::Location]? locations) -> Array[Hash[Symbol, String | Integer | nil]] def build_snapshot_base: (Context context, ?evaluation_errors: Array[untyped]?, ?captures: untyped?, ?message: String?) -> Hash[Symbol,untyped] From 74c2f91dae56a45d42012937165d1c6a93e75594 Mon Sep 17 00:00:00 2001 From: ddsign Date: Tue, 31 Mar 2026 09:20:22 -0400 Subject: [PATCH 11/16] Fall back to string backtrace when backtrace_locations is nil MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit backtrace_locations returns nil when Exception#set_backtrace was called with Array — the VM cannot reconstruct Location objects from formatted strings. This happens in exception wrapping patterns. Add EXCEPTION_BACKTRACE UnboundMethod (same bypass trick) and reinstate the string-parsing format_backtrace_strings as a fallback path. Co-Authored-By: Claude Opus 4.6 --- lib/datadog/di.rb | 17 ++++ lib/datadog/di/probe_notification_builder.rb | 52 ++++++++++-- sig/datadog/di.rbs | 1 + sig/datadog/di/probe_notification_builder.rbs | 4 +- .../di/ext/exception_backtrace_spec.rb | 83 +++++++++++++++++++ .../di/probe_notification_builder_spec.rb | 34 ++++++++ 6 files changed, 185 insertions(+), 6 deletions(-) diff --git a/lib/datadog/di.rb b/lib/datadog/di.rb index f09e9d00db0..fcaad7696e3 100644 --- a/lib/datadog/di.rb +++ b/lib/datadog/di.rb @@ -26,6 +26,23 @@ module DI # always dispatches to the original Exception implementation. EXCEPTION_BACKTRACE_LOCATIONS = Exception.instance_method(:backtrace_locations) + # Same UnboundMethod trick for Exception#backtrace (Array). + # Used as a fallback when backtrace_locations returns nil — which happens + # when someone calls Exception#set_backtrace with an Array. + # + # set_backtrace accepts Array or nil. When called with strings, + # it replaces the VM-level backtrace: backtrace returns the new strings, + # but backtrace_locations returns nil because the VM cannot reconstruct + # Location objects from formatted strings. This occurs in exception + # wrapping patterns where a library catches an exception, creates a new + # one, and copies the original's string backtrace onto it via + # set_backtrace before re-raising. + # + # Ruby 3.4+ also allows set_backtrace(Array), which preserves + # backtrace_locations — but older Rubies and most existing code use + # the string form. + EXCEPTION_BACKTRACE = Exception.instance_method(:backtrace) + class << self def enabled? Datadog.configuration.dynamic_instrumentation.enabled diff --git a/lib/datadog/di/probe_notification_builder.rb b/lib/datadog/di/probe_notification_builder.rb index c16abcab689..ffd121ea97b 100644 --- a/lib/datadog/di/probe_notification_builder.rb +++ b/lib/datadog/di/probe_notification_builder.rb @@ -189,13 +189,35 @@ def serialize_throwable(exception) # The exception class is already reported via the :type field. '' end + # Prefer backtrace_locations (structured Location objects) over + # backtrace (formatted strings that need regex parsing). + # + # However, backtrace_locations returns nil when someone has called + # Exception#set_backtrace with Array — the VM cannot + # reconstruct Location objects from formatted strings. This happens + # in exception wrapping patterns (catch, create new exception, copy + # original's string backtrace via set_backtrace, re-raise). + # In that case, fall back to backtrace strings. + # + # Both accessors use the UnboundMethod trick to bypass subclass + # overrides, consistent with the rest of this method. + locations = DI::EXCEPTION_BACKTRACE_LOCATIONS.bind(exception).call + stacktrace = if locations + format_backtrace_locations(locations) + else + format_backtrace_strings(DI::EXCEPTION_BACKTRACE.bind(exception).call) + end { type: exception.class.name, message: message, - stacktrace: format_backtrace(DI::EXCEPTION_BACKTRACE_LOCATIONS.bind(exception).call), + stacktrace: stacktrace, } end + # Matches Ruby backtrace frame format: "/path/file.rb:42:in `method_name'" + # Captures: $1 = file path, $2 = line number, $3 = method name + BACKTRACE_FRAME_PATTERN = /\A(.+):(\d+):in\s+[`'](.+)'\z/ + # Converts backtrace locations into the stack frame format # expected by the Datadog UI. # @@ -203,16 +225,36 @@ def serialize_throwable(exception) # path/lineno/label directly, avoiding the round-trip of formatting # to strings and regex-parsing back. # - # @param locations [Array, nil] + # @param locations [Array] # @return [Array] - def format_backtrace(locations) - return [] if locations.nil? - + def format_backtrace_locations(locations) locations.map do |loc| {fileName: loc.path, function: loc.label, lineNumber: loc.lineno} end end + # Parses Ruby backtrace strings into the stack frame format + # expected by the Datadog UI. + # + # Fallback for when backtrace_locations returns nil (see + # serialize_throwable for details on when this happens). + # + # Ruby backtrace format: "/path/file.rb:42:in `method_name'" + # + # @param backtrace [Array, nil] from Exception#backtrace + # @return [Array] + def format_backtrace_strings(backtrace) + return [] if backtrace.nil? + + backtrace.map do |frame| + if frame =~ BACKTRACE_FRAME_PATTERN + {fileName: $1, function: $3, lineNumber: $2.to_i} + else + {fileName: frame, function: '', lineNumber: 0} + end + end + end + def build_snapshot_base(context, evaluation_errors: [], captures: nil, message: nil) probe = context.probe diff --git a/sig/datadog/di.rbs b/sig/datadog/di.rbs index a6ede3d9e98..e1e27763969 100644 --- a/sig/datadog/di.rbs +++ b/sig/datadog/di.rbs @@ -11,6 +11,7 @@ module Datadog def self.file_iseqs: () -> Array[RubyVM::InstructionSequence] def self.exception_message: (Exception exception) -> untyped EXCEPTION_BACKTRACE_LOCATIONS: UnboundMethod + EXCEPTION_BACKTRACE: UnboundMethod def self.component: () -> Component diff --git a/sig/datadog/di/probe_notification_builder.rbs b/sig/datadog/di/probe_notification_builder.rbs index 2fe963a68d4..55552801d0f 100644 --- a/sig/datadog/di/probe_notification_builder.rbs +++ b/sig/datadog/di/probe_notification_builder.rbs @@ -3,6 +3,7 @@ module Datadog class ProbeNotificationBuilder NANOSECONDS: Integer MILLISECONDS: Integer + BACKTRACE_FRAME_PATTERN: Regexp @serializer: Serializer @@ -25,7 +26,8 @@ module Datadog def build_snapshot: (Context context) -> Hash[Symbol,untyped] def serialize_throwable: (Exception exception) -> Hash[Symbol, String? | Array[Hash[Symbol, String | Integer | nil]]?] - def format_backtrace: (Array[Thread::Backtrace::Location]? locations) -> Array[Hash[Symbol, String | Integer | nil]] + def format_backtrace_locations: (Array[Thread::Backtrace::Location] locations) -> Array[Hash[Symbol, String | Integer | nil]] + def format_backtrace_strings: (Array[String]? backtrace) -> Array[Hash[Symbol, String | Integer | nil]] def build_snapshot_base: (Context context, ?evaluation_errors: Array[untyped]?, ?captures: untyped?, ?message: String?) -> Hash[Symbol,untyped] diff --git a/spec/datadog/di/ext/exception_backtrace_spec.rb b/spec/datadog/di/ext/exception_backtrace_spec.rb index 12895384d7c..463ee30da1b 100644 --- a/spec/datadog/di/ext/exception_backtrace_spec.rb +++ b/spec/datadog/di/ext/exception_backtrace_spec.rb @@ -56,4 +56,87 @@ expect(exception.backtrace_locations).to eq([]) end end + + context 'when backtrace was set via set_backtrace with strings' do + let(:exception) do + e = StandardError.new('wrapped') + e.set_backtrace(['/app/foo.rb:10:in `bar\'', '/app/baz.rb:20:in `qux\'']) + e + end + + it 'returns nil for backtrace_locations' do + # set_backtrace with Array causes backtrace_locations to + # return nil — the VM cannot reconstruct Location objects from + # formatted strings. + expect(backtrace).to be_nil + end + end +end + +RSpec.describe 'EXCEPTION_BACKTRACE' do + subject(:backtrace) do + Datadog::DI::EXCEPTION_BACKTRACE.bind(exception).call + end + + context 'when exception has a backtrace' do + let(:exception) do + raise StandardError, 'test' + rescue => e + e + end + + it 'returns an array of strings' do + expect(backtrace).to be_an(Array) + expect(backtrace).not_to be_empty + expect(backtrace.first).to be_a(String) + end + end + + context 'when exception has no backtrace' do + let(:exception) do + StandardError.new('no backtrace') + end + + it 'returns nil' do + expect(backtrace).to be_nil + end + end + + context 'when exception class overrides backtrace method' do + let(:exception_class) do + Class.new(StandardError) do + define_method(:backtrace) do + ['overridden:0:in `fake\''] + end + end + end + + let(:exception) do + raise exception_class, 'test' + rescue => e + e + end + + it 'returns the real backtrace, not the overridden one' do + expect(backtrace).to be_an(Array) + expect(backtrace).not_to be_empty + # The real backtrace contains the actual file path, not the override. + expect(backtrace.first).not_to eq('overridden:0:in `fake\'') + + # Verify the override exists on the Ruby side. + expect(exception.backtrace).to eq(['overridden:0:in `fake\'']) + end + end + + context 'when backtrace was set via set_backtrace with strings' do + let(:exception) do + e = StandardError.new('wrapped') + e.set_backtrace(['/app/foo.rb:10:in `bar\'', '/app/baz.rb:20:in `qux\'']) + e + end + + it 'returns the string backtrace' do + expect(backtrace).to eq(['/app/foo.rb:10:in `bar\'', '/app/baz.rb:20:in `qux\'']) + end + end end diff --git a/spec/datadog/di/probe_notification_builder_spec.rb b/spec/datadog/di/probe_notification_builder_spec.rb index f995d49cf7f..15f6179d9b3 100644 --- a/spec/datadog/di/probe_notification_builder_spec.rb +++ b/spec/datadog/di/probe_notification_builder_spec.rb @@ -606,6 +606,40 @@ end end + context 'when backtrace was set via set_backtrace with strings' do + let(:exception) do + e = StandardError.new('wrapped') + e.set_backtrace(['/app/foo.rb:10:in `bar\'', '/app/baz.rb:20:in `qux\'']) + e + end + + let(:context) do + Datadog::DI::Context.new( + probe: probe, + settings: settings, + serializer: serializer, + target_self: target_self, + serialized_entry_args: {}, + return_value: nil, + duration: 0.1, + exception: exception, + ) + end + + let(:payload) { builder.build_executed(context) } + + it 'falls back to string backtrace parsing' do + # set_backtrace with Array causes backtrace_locations to + # return nil. serialize_throwable should fall back to parsing the + # string backtrace. + throwable = payload.dig(:debugger, :snapshot, :captures, :return, :throwable) + expect(throwable[:stacktrace]).to eq([ + {fileName: '/app/foo.rb', function: 'bar', lineNumber: 10}, + {fileName: '/app/baz.rb', function: 'qux', lineNumber: 20}, + ]) + end + end + context 'when exception constructor argument is not a string' do let(:exception) { NameError.new(42) } From fbce545d6969ed4de18d683d4cc51484a47fa447 Mon Sep 17 00:00:00 2001 From: ddsign Date: Tue, 31 Mar 2026 09:48:35 -0400 Subject: [PATCH 12/16] Fix EXCEPTION_BACKTRACE test: UnboundMethod does not bypass overrides MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exception.instance_method(:backtrace).bind(exception).call returns nil when the subclass overrides #backtrace — unlike backtrace_locations, the C implementation of #backtrace does not bypass Ruby-level overrides via UnboundMethod. This is acceptable because EXCEPTION_BACKTRACE is only used as a fallback for the set_backtrace-with-strings case, where no subclass override is involved. Update the test to document the actual behavior and add a comment on the constant explaining the limitation. Co-Authored-By: Claude Opus 4.6 --- lib/datadog/di.rb | 7 +++++++ spec/datadog/di/ext/exception_backtrace_spec.rb | 14 +++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/datadog/di.rb b/lib/datadog/di.rb index fcaad7696e3..499f0a7a612 100644 --- a/lib/datadog/di.rb +++ b/lib/datadog/di.rb @@ -41,6 +41,13 @@ module DI # Ruby 3.4+ also allows set_backtrace(Array), which preserves # backtrace_locations — but older Rubies and most existing code use # the string form. + # + # LIMITATION: Unlike EXCEPTION_BACKTRACE_LOCATIONS, this UnboundMethod + # does NOT bypass subclass overrides of #backtrace — the C implementation + # returns nil when a Ruby-level override exists. This is acceptable + # because this constant is only used as a fallback for the set_backtrace + # case, where no subclass override is involved. The primary protection + # (backtrace_locations via UnboundMethod) handles the override case. EXCEPTION_BACKTRACE = Exception.instance_method(:backtrace) class << self diff --git a/spec/datadog/di/ext/exception_backtrace_spec.rb b/spec/datadog/di/ext/exception_backtrace_spec.rb index 463ee30da1b..fc8ca22b34d 100644 --- a/spec/datadog/di/ext/exception_backtrace_spec.rb +++ b/spec/datadog/di/ext/exception_backtrace_spec.rb @@ -117,11 +117,15 @@ e end - it 'returns the real backtrace, not the overridden one' do - expect(backtrace).to be_an(Array) - expect(backtrace).not_to be_empty - # The real backtrace contains the actual file path, not the override. - expect(backtrace.first).not_to eq('overridden:0:in `fake\'') + it 'returns nil — UnboundMethod does NOT bypass overrides for backtrace' do + # Unlike backtrace_locations, the UnboundMethod trick does not bypass + # subclass overrides of Exception#backtrace. The C implementation + # returns nil when a Ruby-level override exists. + # + # This is acceptable because EXCEPTION_BACKTRACE is only used as a + # fallback when backtrace_locations returns nil (the set_backtrace + # with strings case), where no subclass override is involved. + expect(backtrace).to be_nil # Verify the override exists on the Ruby side. expect(exception.backtrace).to eq(['overridden:0:in `fake\'']) From e8b3e24679ef79a3776f7ce7e725c093dcc31317 Mon Sep 17 00:00:00 2001 From: ddsign Date: Tue, 31 Mar 2026 10:27:03 -0400 Subject: [PATCH 13/16] Explain why UnboundMethod doesn't bypass backtrace overrides MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MRI's setup_exception (eval.c) calls rb_get_backtrace during raise, which checks rb_method_basic_definition_p for #backtrace overrides. When an override exists, it calls the override, gets a non-nil result, and skips storing the real VM backtrace in @bt and @bt_locations. The C function exc_backtrace then reads @bt (still nil from exc_init) and returns nil. setup_exception does NOT check for #backtrace_locations overrides — only #backtrace. So the EXCEPTION_BACKTRACE_LOCATIONS UnboundMethod works because the real backtrace is always stored in @bt_locations (unless #backtrace is overridden). Co-Authored-By: Claude Opus 4.6 --- lib/datadog/di.rb | 20 ++++++++++++++----- .../di/ext/exception_backtrace_spec.rb | 8 ++++++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/lib/datadog/di.rb b/lib/datadog/di.rb index 499f0a7a612..04b5a9bcf32 100644 --- a/lib/datadog/di.rb +++ b/lib/datadog/di.rb @@ -43,11 +43,21 @@ module DI # the string form. # # LIMITATION: Unlike EXCEPTION_BACKTRACE_LOCATIONS, this UnboundMethod - # does NOT bypass subclass overrides of #backtrace — the C implementation - # returns nil when a Ruby-level override exists. This is acceptable - # because this constant is only used as a fallback for the set_backtrace - # case, where no subclass override is involved. The primary protection - # (backtrace_locations via UnboundMethod) handles the override case. + # does NOT bypass subclass overrides of #backtrace. When a subclass + # overrides #backtrace, MRI's setup_exception (eval.c) calls the + # override via rb_get_backtrace, gets a non-nil result, and skips + # storing the real VM backtrace in @bt and @bt_locations entirely. + # The C function exc_backtrace then reads @bt (still nil from + # exc_init) and returns nil. + # + # By contrast, setup_exception only checks for #backtrace overrides, + # not #backtrace_locations overrides. So when only backtrace_locations + # is overridden, the real backtrace IS stored, and the UnboundMethod + # for backtrace_locations reads it directly from @bt_locations. + # + # This limitation is acceptable because this constant is only used as + # a fallback for the set_backtrace-with-strings case, where no + # subclass override is involved. EXCEPTION_BACKTRACE = Exception.instance_method(:backtrace) class << self diff --git a/spec/datadog/di/ext/exception_backtrace_spec.rb b/spec/datadog/di/ext/exception_backtrace_spec.rb index fc8ca22b34d..905cb30249f 100644 --- a/spec/datadog/di/ext/exception_backtrace_spec.rb +++ b/spec/datadog/di/ext/exception_backtrace_spec.rb @@ -119,8 +119,12 @@ it 'returns nil — UnboundMethod does NOT bypass overrides for backtrace' do # Unlike backtrace_locations, the UnboundMethod trick does not bypass - # subclass overrides of Exception#backtrace. The C implementation - # returns nil when a Ruby-level override exists. + # subclass overrides of Exception#backtrace. MRI's setup_exception + # (eval.c) calls rb_get_backtrace during raise, which detects the + # #backtrace override and calls it. Since the override returns + # non-nil, setup_exception skips storing the real VM backtrace in + # @bt entirely. The C function exc_backtrace then reads @bt (still + # nil from exc_init) and returns nil. # # This is acceptable because EXCEPTION_BACKTRACE is only used as a # fallback when backtrace_locations returns nil (the set_backtrace From c97b95220b09989c0c97681c6d90a170cb034e7b Mon Sep 17 00:00:00 2001 From: ddsign Date: Tue, 31 Mar 2026 10:32:09 -0400 Subject: [PATCH 14/16] Add doc explaining Exception backtrace internals and UnboundMethod behavior Documents why UnboundMethod bypasses backtrace_locations overrides but not backtrace overrides, traced through MRI's setup_exception (eval.c), rb_get_backtrace (error.c), exc_backtrace, and exc_backtrace_locations. Includes code examples for every combination of overrides and set_backtrace, a summary table, and implications for DI's fallback path. Co-Authored-By: Claude Opus 4.6 --- docs/ExceptionBacktraceInternals.md | 226 ++++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 docs/ExceptionBacktraceInternals.md diff --git a/docs/ExceptionBacktraceInternals.md b/docs/ExceptionBacktraceInternals.md new file mode 100644 index 00000000000..1c0bc7ad076 --- /dev/null +++ b/docs/ExceptionBacktraceInternals.md @@ -0,0 +1,226 @@ +# Exception Backtrace Internals: Why UnboundMethod Bypasses `backtrace_locations` Overrides But Not `backtrace` Overrides + +This document explains a MRI implementation detail that affects how +`Datadog::DI::EXCEPTION_BACKTRACE_LOCATIONS` and `Datadog::DI::EXCEPTION_BACKTRACE` +work when exception subclasses override backtrace methods. + +## Background + +DI captures `Exception.instance_method(:backtrace_locations)` and +`Exception.instance_method(:backtrace)` as UnboundMethod constants at load time. +The intent is to call `bind(exception).call` to get the real backtrace without +dispatching through the exception's method table, bypassing subclass overrides. + +This works for `backtrace_locations` but **not** for `backtrace`. The reason is +not in how UnboundMethod dispatches — both correctly call the C function — but in +what MRI's `raise` stores on the exception object beforehand. + +## How MRI Stores Backtraces During `raise` + +When Ruby executes `raise`, it calls `setup_exception` (eval.c), which decides +whether to store the VM's backtrace on the exception: + +```c +// eval.c — setup_exception (simplified) +VALUE bt = rb_get_backtrace(mesg); // ← checks for #backtrace override +if (NIL_P(bt)) { + VALUE at = rb_ec_backtrace_object(ec); + rb_ivar_set(mesg, idBt_locations, at); // store in @bt_locations + set_backtrace(mesg, at); // store in @bt +} +``` + +`rb_get_backtrace` (error.c) checks whether `#backtrace` — specifically +`#backtrace`, not `#backtrace_locations` — has been overridden: + +```c +// error.c — rb_get_backtrace (simplified) +if (rb_method_basic_definition_p(CLASS_OF(exc), id_backtrace)) { + info = exc_backtrace(exc); // no override → C function +} else { + info = rb_funcallv(exc, mid, 0, 0); // override → call it +} +``` + +The logic: if the exception already "has" a backtrace (the override returned +non-nil), don't overwrite it with the VM's backtrace. This means: + +- **Override `backtrace`** → `rb_get_backtrace` calls the override → gets non-nil + → `NIL_P(bt)` is false → real backtrace **never stored** in `@bt` or + `@bt_locations`. + +- **Override `backtrace_locations`** (but not `backtrace`) → `rb_get_backtrace` + finds no override of `backtrace` → calls `exc_backtrace` → `@bt` is nil + (initialized to nil by `exc_init`) → returns nil → `NIL_P(bt)` is true → real + backtrace **stored** in both `@bt` and `@bt_locations`. + +## What the C Functions Read + +Both `exc_backtrace` and `exc_backtrace_locations` are simple ivar readers: + +```c +// error.c +static VALUE exc_backtrace(VALUE exc) { + VALUE obj = rb_attr_get(exc, id_bt); // reads @bt + if (rb_backtrace_p(obj)) + obj = rb_backtrace_to_str_ary(obj); // convert raw → strings + return obj; +} + +static VALUE exc_backtrace_locations(VALUE exc) { + VALUE obj = rb_attr_get(exc, id_bt_locations); // reads @bt_locations + if (!NIL_P(obj)) + obj = rb_backtrace_to_location_ary(obj); // convert raw → Locations + return obj; +} +``` + +When called via UnboundMethod, these C functions execute correctly — the dispatch +is not the problem. The problem is that the ivars they read may be empty because +`setup_exception` never populated them. + +## Demonstration + +### Override `backtrace_locations` only — UnboundMethod bypasses it + +```ruby +BT = Exception.instance_method(:backtrace) +BT_LOCS = Exception.instance_method(:backtrace_locations) + +class OverrideLocations < StandardError + def backtrace_locations; []; end +end + +e = begin; raise OverrideLocations, "test"; rescue => e; e; end +``` + +During `raise`: +1. `rb_get_backtrace` checks for `#backtrace` override → **none found** +2. Calls `exc_backtrace` → `@bt` is nil → returns nil +3. `NIL_P(bt)` is true → **stores real backtrace** in `@bt` and `@bt_locations` + +After `raise`: + +```ruby +e.backtrace_locations #=> [] (Ruby override) +BT_LOCS.bind(e).call.first #=> # (real backtrace from @bt_locations) +BT.bind(e).call.first #=> "example.rb:8:in '
'" (real backtrace from @bt) +``` + +Both UnboundMethod calls return real data because `setup_exception` stored it. + +### Override `backtrace` only — UnboundMethod returns nil + +```ruby +class OverrideBacktrace < StandardError + def backtrace; ["fake:0:in 'fake'"]; end +end + +e = begin; raise OverrideBacktrace, "test"; rescue => e; e; end +``` + +During `raise`: +1. `rb_get_backtrace` checks for `#backtrace` override → **found** +2. Calls the override → gets `["fake:0:in 'fake'"]` (non-nil) +3. `NIL_P(bt)` is false → **skips storing** → `@bt` stays nil, `@bt_locations` stays nil + +After `raise`: + +```ruby +e.backtrace #=> ["fake:0:in 'fake'"] (Ruby override) +BT.bind(e).call #=> nil (@bt was never populated) +BT_LOCS.bind(e).call #=> nil (@bt_locations was never populated) +``` + +Both UnboundMethod calls return nil. The real backtrace was never stored. + +### Override both — UnboundMethod returns nil for both + +```ruby +class OverrideBoth < StandardError + def backtrace; ["fake:0:in 'fake'"]; end + def backtrace_locations; []; end +end + +e = begin; raise OverrideBoth, "test"; rescue => e; e; end +``` + +During `raise`: +1. `rb_get_backtrace` checks for `#backtrace` override → **found** +2. Calls the override → non-nil → skips storing + +After `raise`: + +```ruby +e.backtrace #=> ["fake:0:in 'fake'"] (override) +e.backtrace_locations #=> [] (override) +BT.bind(e).call #=> nil (not stored) +BT_LOCS.bind(e).call #=> nil (not stored) +``` + +Same as overriding `backtrace` alone — `rb_get_backtrace` only checks +`#backtrace`, so adding a `#backtrace_locations` override changes nothing +about what `setup_exception` stores. + +### `set_backtrace` with strings — no override, UnboundMethod works for `backtrace` + +```ruby +e = StandardError.new("wrapped") +e.set_backtrace(["/app/foo.rb:10:in 'bar'"]) +``` + +No `raise` involved, so `setup_exception` never runs. `set_backtrace` (error.c) +stores the string array directly in `@bt`: + +```c +// error.c — exc_set_backtrace (simplified) +btobj = rb_location_ary_to_backtrace(bt); +if (RTEST(btobj)) { + rb_ivar_set(exc, id_bt, btobj); // Location array + rb_ivar_set(exc, id_bt_locations, btobj); +} else { + rb_ivar_set(exc, id_bt, rb_check_backtrace(bt)); // string array → @bt only +} +``` + +String arrays go into `@bt` only (not `@bt_locations`): + +```ruby +BT.bind(e).call #=> ["/app/foo.rb:10:in 'bar'"] (reads @bt) +BT_LOCS.bind(e).call #=> nil (@bt_locations not set) +``` + +This is the case DI's fallback path handles: `backtrace_locations` returns nil +(triggering the fallback), then `backtrace` returns the string array (which we +parse with a regex). + +## Summary Table + +| Scenario | `@bt` | `@bt_locations` | UnboundMethod `backtrace` | UnboundMethod `backtrace_locations` | +|---|---|---|---|---| +| No override | real backtrace | real backtrace | real strings | real Locations | +| Override `backtrace_locations` only | real backtrace | real backtrace | real strings | real Locations | +| Override `backtrace` only | nil | nil | nil | nil | +| Override both | nil | nil | nil | nil | +| `set_backtrace` with strings | string array | nil | string array | nil | +| `set_backtrace` with Locations (Ruby 3.4+) | Location array | Location array | real strings | real Locations | + +## Implications for DI + +DI uses `EXCEPTION_BACKTRACE_LOCATIONS` as the primary path and +`EXCEPTION_BACKTRACE` as a fallback for when `backtrace_locations` returns nil +(the `set_backtrace` with strings case). + +The limitation — that overriding `#backtrace` prevents both UnboundMethod calls +from working — does not affect DI in practice: + +1. The primary path (`backtrace_locations` via UnboundMethod) handles the common + case: exceptions raised normally, possibly with `backtrace_locations` overridden. + +2. The fallback path (`backtrace` via UnboundMethod) handles the `set_backtrace` + with strings case, where no subclass override is involved. + +3. The gap is: a subclass that overrides `#backtrace` AND whose instance had + `set_backtrace` called with strings. In this case, both paths return nil and + DI reports an empty stacktrace. This combination is extremely unlikely in + practice, and the exception type and message are still reported. From 0d85d49a5e834484b0340f400cacbbe424f6b41f Mon Sep 17 00:00:00 2001 From: ddsign Date: Tue, 31 Mar 2026 10:34:19 -0400 Subject: [PATCH 15/16] Remove exception backtrace doc (moved to claude-projects) --- docs/ExceptionBacktraceInternals.md | 226 ---------------------------- 1 file changed, 226 deletions(-) delete mode 100644 docs/ExceptionBacktraceInternals.md diff --git a/docs/ExceptionBacktraceInternals.md b/docs/ExceptionBacktraceInternals.md deleted file mode 100644 index 1c0bc7ad076..00000000000 --- a/docs/ExceptionBacktraceInternals.md +++ /dev/null @@ -1,226 +0,0 @@ -# Exception Backtrace Internals: Why UnboundMethod Bypasses `backtrace_locations` Overrides But Not `backtrace` Overrides - -This document explains a MRI implementation detail that affects how -`Datadog::DI::EXCEPTION_BACKTRACE_LOCATIONS` and `Datadog::DI::EXCEPTION_BACKTRACE` -work when exception subclasses override backtrace methods. - -## Background - -DI captures `Exception.instance_method(:backtrace_locations)` and -`Exception.instance_method(:backtrace)` as UnboundMethod constants at load time. -The intent is to call `bind(exception).call` to get the real backtrace without -dispatching through the exception's method table, bypassing subclass overrides. - -This works for `backtrace_locations` but **not** for `backtrace`. The reason is -not in how UnboundMethod dispatches — both correctly call the C function — but in -what MRI's `raise` stores on the exception object beforehand. - -## How MRI Stores Backtraces During `raise` - -When Ruby executes `raise`, it calls `setup_exception` (eval.c), which decides -whether to store the VM's backtrace on the exception: - -```c -// eval.c — setup_exception (simplified) -VALUE bt = rb_get_backtrace(mesg); // ← checks for #backtrace override -if (NIL_P(bt)) { - VALUE at = rb_ec_backtrace_object(ec); - rb_ivar_set(mesg, idBt_locations, at); // store in @bt_locations - set_backtrace(mesg, at); // store in @bt -} -``` - -`rb_get_backtrace` (error.c) checks whether `#backtrace` — specifically -`#backtrace`, not `#backtrace_locations` — has been overridden: - -```c -// error.c — rb_get_backtrace (simplified) -if (rb_method_basic_definition_p(CLASS_OF(exc), id_backtrace)) { - info = exc_backtrace(exc); // no override → C function -} else { - info = rb_funcallv(exc, mid, 0, 0); // override → call it -} -``` - -The logic: if the exception already "has" a backtrace (the override returned -non-nil), don't overwrite it with the VM's backtrace. This means: - -- **Override `backtrace`** → `rb_get_backtrace` calls the override → gets non-nil - → `NIL_P(bt)` is false → real backtrace **never stored** in `@bt` or - `@bt_locations`. - -- **Override `backtrace_locations`** (but not `backtrace`) → `rb_get_backtrace` - finds no override of `backtrace` → calls `exc_backtrace` → `@bt` is nil - (initialized to nil by `exc_init`) → returns nil → `NIL_P(bt)` is true → real - backtrace **stored** in both `@bt` and `@bt_locations`. - -## What the C Functions Read - -Both `exc_backtrace` and `exc_backtrace_locations` are simple ivar readers: - -```c -// error.c -static VALUE exc_backtrace(VALUE exc) { - VALUE obj = rb_attr_get(exc, id_bt); // reads @bt - if (rb_backtrace_p(obj)) - obj = rb_backtrace_to_str_ary(obj); // convert raw → strings - return obj; -} - -static VALUE exc_backtrace_locations(VALUE exc) { - VALUE obj = rb_attr_get(exc, id_bt_locations); // reads @bt_locations - if (!NIL_P(obj)) - obj = rb_backtrace_to_location_ary(obj); // convert raw → Locations - return obj; -} -``` - -When called via UnboundMethod, these C functions execute correctly — the dispatch -is not the problem. The problem is that the ivars they read may be empty because -`setup_exception` never populated them. - -## Demonstration - -### Override `backtrace_locations` only — UnboundMethod bypasses it - -```ruby -BT = Exception.instance_method(:backtrace) -BT_LOCS = Exception.instance_method(:backtrace_locations) - -class OverrideLocations < StandardError - def backtrace_locations; []; end -end - -e = begin; raise OverrideLocations, "test"; rescue => e; e; end -``` - -During `raise`: -1. `rb_get_backtrace` checks for `#backtrace` override → **none found** -2. Calls `exc_backtrace` → `@bt` is nil → returns nil -3. `NIL_P(bt)` is true → **stores real backtrace** in `@bt` and `@bt_locations` - -After `raise`: - -```ruby -e.backtrace_locations #=> [] (Ruby override) -BT_LOCS.bind(e).call.first #=> # (real backtrace from @bt_locations) -BT.bind(e).call.first #=> "example.rb:8:in '
'" (real backtrace from @bt) -``` - -Both UnboundMethod calls return real data because `setup_exception` stored it. - -### Override `backtrace` only — UnboundMethod returns nil - -```ruby -class OverrideBacktrace < StandardError - def backtrace; ["fake:0:in 'fake'"]; end -end - -e = begin; raise OverrideBacktrace, "test"; rescue => e; e; end -``` - -During `raise`: -1. `rb_get_backtrace` checks for `#backtrace` override → **found** -2. Calls the override → gets `["fake:0:in 'fake'"]` (non-nil) -3. `NIL_P(bt)` is false → **skips storing** → `@bt` stays nil, `@bt_locations` stays nil - -After `raise`: - -```ruby -e.backtrace #=> ["fake:0:in 'fake'"] (Ruby override) -BT.bind(e).call #=> nil (@bt was never populated) -BT_LOCS.bind(e).call #=> nil (@bt_locations was never populated) -``` - -Both UnboundMethod calls return nil. The real backtrace was never stored. - -### Override both — UnboundMethod returns nil for both - -```ruby -class OverrideBoth < StandardError - def backtrace; ["fake:0:in 'fake'"]; end - def backtrace_locations; []; end -end - -e = begin; raise OverrideBoth, "test"; rescue => e; e; end -``` - -During `raise`: -1. `rb_get_backtrace` checks for `#backtrace` override → **found** -2. Calls the override → non-nil → skips storing - -After `raise`: - -```ruby -e.backtrace #=> ["fake:0:in 'fake'"] (override) -e.backtrace_locations #=> [] (override) -BT.bind(e).call #=> nil (not stored) -BT_LOCS.bind(e).call #=> nil (not stored) -``` - -Same as overriding `backtrace` alone — `rb_get_backtrace` only checks -`#backtrace`, so adding a `#backtrace_locations` override changes nothing -about what `setup_exception` stores. - -### `set_backtrace` with strings — no override, UnboundMethod works for `backtrace` - -```ruby -e = StandardError.new("wrapped") -e.set_backtrace(["/app/foo.rb:10:in 'bar'"]) -``` - -No `raise` involved, so `setup_exception` never runs. `set_backtrace` (error.c) -stores the string array directly in `@bt`: - -```c -// error.c — exc_set_backtrace (simplified) -btobj = rb_location_ary_to_backtrace(bt); -if (RTEST(btobj)) { - rb_ivar_set(exc, id_bt, btobj); // Location array - rb_ivar_set(exc, id_bt_locations, btobj); -} else { - rb_ivar_set(exc, id_bt, rb_check_backtrace(bt)); // string array → @bt only -} -``` - -String arrays go into `@bt` only (not `@bt_locations`): - -```ruby -BT.bind(e).call #=> ["/app/foo.rb:10:in 'bar'"] (reads @bt) -BT_LOCS.bind(e).call #=> nil (@bt_locations not set) -``` - -This is the case DI's fallback path handles: `backtrace_locations` returns nil -(triggering the fallback), then `backtrace` returns the string array (which we -parse with a regex). - -## Summary Table - -| Scenario | `@bt` | `@bt_locations` | UnboundMethod `backtrace` | UnboundMethod `backtrace_locations` | -|---|---|---|---|---| -| No override | real backtrace | real backtrace | real strings | real Locations | -| Override `backtrace_locations` only | real backtrace | real backtrace | real strings | real Locations | -| Override `backtrace` only | nil | nil | nil | nil | -| Override both | nil | nil | nil | nil | -| `set_backtrace` with strings | string array | nil | string array | nil | -| `set_backtrace` with Locations (Ruby 3.4+) | Location array | Location array | real strings | real Locations | - -## Implications for DI - -DI uses `EXCEPTION_BACKTRACE_LOCATIONS` as the primary path and -`EXCEPTION_BACKTRACE` as a fallback for when `backtrace_locations` returns nil -(the `set_backtrace` with strings case). - -The limitation — that overriding `#backtrace` prevents both UnboundMethod calls -from working — does not affect DI in practice: - -1. The primary path (`backtrace_locations` via UnboundMethod) handles the common - case: exceptions raised normally, possibly with `backtrace_locations` overridden. - -2. The fallback path (`backtrace` via UnboundMethod) handles the `set_backtrace` - with strings case, where no subclass override is involved. - -3. The gap is: a subclass that overrides `#backtrace` AND whose instance had - `set_backtrace` called with strings. In this case, both paths return nil and - DI reports an empty stacktrace. This combination is extremely unlikely in - practice, and the exception type and message are still reported. From 38a3537539c6e9e44a3f55578026fa6d6ead9961 Mon Sep 17 00:00:00 2001 From: ddsign Date: Tue, 31 Mar 2026 10:45:16 -0400 Subject: [PATCH 16/16] Document all backtrace override scenarios in code comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - EXCEPTION_BACKTRACE_LOCATIONS: note that overriding #backtrace (not just #backtrace_locations) causes @bt_locations to also be nil - EXCEPTION_BACKTRACE: enumerate all cases — set_backtrace works even with override, only gap is override + raise + no set_backtrace - serialize_throwable: note the unrecoverable case Co-Authored-By: Claude Opus 4.6 --- lib/datadog/di.rb | 20 ++++++++++++++++++-- lib/datadog/di/probe_notification_builder.rb | 5 +++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/lib/datadog/di.rb b/lib/datadog/di.rb index 04b5a9bcf32..78409a75744 100644 --- a/lib/datadog/di.rb +++ b/lib/datadog/di.rb @@ -24,6 +24,13 @@ module DI # # The UnboundMethod bypasses subclass overrides: bind(exception).call # always dispatches to the original Exception implementation. + # + # Note: if the subclass overrides #backtrace (not #backtrace_locations), + # MRI's setup_exception skips storing the VM backtrace entirely — both + # @bt and @bt_locations stay nil. In that case this UnboundMethod also + # returns nil. See EXCEPTION_BACKTRACE comment and + # docs/ruby/exception-backtrace-internals.md in claude-projects for the + # full MRI analysis. EXCEPTION_BACKTRACE_LOCATIONS = Exception.instance_method(:backtrace_locations) # Same UnboundMethod trick for Exception#backtrace (Array). @@ -56,8 +63,17 @@ module DI # for backtrace_locations reads it directly from @bt_locations. # # This limitation is acceptable because this constant is only used as - # a fallback for the set_backtrace-with-strings case, where no - # subclass override is involved. + # a fallback when backtrace_locations returns nil. In the common + # set_backtrace-with-strings case, no subclass override is involved + # and the fallback works. If a subclass does override #backtrace AND + # set_backtrace was called, set_backtrace writes to @bt via C + # regardless of overrides, so the fallback still works. + # + # The only unrecoverable case: a subclass overrides #backtrace, the + # exception is raised normally, and set_backtrace is never called. + # Both @bt and @bt_locations are nil — the real backtrace was never + # stored by raise. DI reports an empty stacktrace (type and message + # are still reported). EXCEPTION_BACKTRACE = Exception.instance_method(:backtrace) class << self diff --git a/lib/datadog/di/probe_notification_builder.rb b/lib/datadog/di/probe_notification_builder.rb index ffd121ea97b..39758153edf 100644 --- a/lib/datadog/di/probe_notification_builder.rb +++ b/lib/datadog/di/probe_notification_builder.rb @@ -201,6 +201,11 @@ def serialize_throwable(exception) # # Both accessors use the UnboundMethod trick to bypass subclass # overrides, consistent with the rest of this method. + # + # If a subclass overrides #backtrace, MRI's raise never stores + # the real backtrace — both paths return nil and stacktrace is []. + # This is unrecoverable without calling customer code. + # See DI::EXCEPTION_BACKTRACE comment for details. locations = DI::EXCEPTION_BACKTRACE_LOCATIONS.bind(exception).call stacktrace = if locations format_backtrace_locations(locations)