From deb9f0c0511623618d110dc415db3372272dfaa0 Mon Sep 17 00:00:00 2001 From: Steven Bouwkamp Date: Thu, 16 Oct 2025 16:06:59 -0400 Subject: [PATCH 1/4] Remove pii_safe Messages sent to our backend from the tracing libraries must be constant messages. --- lib/datadog/core/telemetry/logger.rb | 4 +-- lib/datadog/core/telemetry/logging.rb | 9 ++---- .../collectors/cpu_and_wall_time_worker.rb | 2 +- .../collectors/idle_sampling_helper.rb | 2 +- sig/datadog/core/telemetry/logger.rbs | 2 +- sig/datadog/core/telemetry/logging.rbs | 2 +- spec/datadog/core/telemetry/logger_spec.rb | 12 ++------ spec/datadog/core/telemetry/logging_spec.rb | 28 ------------------- 8 files changed, 11 insertions(+), 50 deletions(-) diff --git a/lib/datadog/core/telemetry/logger.rb b/lib/datadog/core/telemetry/logger.rb index b38d81a3ead..b89aedfadca 100644 --- a/lib/datadog/core/telemetry/logger.rb +++ b/lib/datadog/core/telemetry/logger.rb @@ -14,8 +14,8 @@ module Telemetry # read: lib/datadog/core/telemetry/logging.rb module Logger class << self - def report(exception, level: :error, description: nil, pii_safe: false) - instance&.report(exception, level: level, description: description, pii_safe: pii_safe) + def report(exception, level: :error, description: nil) + instance&.report(exception, level: level, description: description) end def error(description) diff --git a/lib/datadog/core/telemetry/logging.rb b/lib/datadog/core/telemetry/logging.rb index e35cf92dad0..e1019beca4a 100644 --- a/lib/datadog/core/telemetry/logging.rb +++ b/lib/datadog/core/telemetry/logging.rb @@ -45,16 +45,13 @@ def self.from(exception) end end - def report(exception, level: :error, description: nil, pii_safe: false) + def report(exception, level: :error, description: nil) # Anonymous exceptions to be logged as message = +"#{exception.class.name || exception.class.inspect}" # standard:disable Style/RedundantInterpolation - exception_message = pii_safe ? exception.message : nil - - if description || exception_message + if description message << ':' - message << " #{description}" if description - message << " (#{exception.message})" if exception_message + message << " #{description}" end event = Event::Log.new( diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb index 130b036a8b3..aa88ec2de9c 100644 --- a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +++ b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb @@ -82,7 +82,7 @@ def start(on_failure_proc: nil) "Cause: #{e.class.name} #{e.message} Location: #{Array(e.backtrace).first}" ) on_failure_proc&.call - Datadog::Core::Telemetry::Logger.report(e, description: "CpuAndWallTimeWorker thread error", pii_safe: true) + Datadog::Core::Telemetry::Logger.report(e, description: "CpuAndWallTimeWorker thread error") end @worker_thread.name = self.class.name # Repeated from above to make sure thread gets named asap @worker_thread.thread_variable_set(:fork_safe, true) diff --git a/lib/datadog/profiling/collectors/idle_sampling_helper.rb b/lib/datadog/profiling/collectors/idle_sampling_helper.rb index 8be5748fcef..6d33ca03e4c 100644 --- a/lib/datadog/profiling/collectors/idle_sampling_helper.rb +++ b/lib/datadog/profiling/collectors/idle_sampling_helper.rb @@ -41,7 +41,7 @@ def start "IdleSamplingHelper thread error. " \ "Cause: #{e.class.name} #{e.message} Location: #{Array(e.backtrace).first}" ) - Datadog::Core::Telemetry::Logger.report(e, description: "IdleSamplingHelper thread error", pii_safe: true) + Datadog::Core::Telemetry::Logger.report(e, description: "IdleSamplingHelper thread error") end @worker_thread.name = self.class.name # Repeated from above to make sure thread gets named asap @worker_thread.thread_variable_set(:fork_safe, true) diff --git a/sig/datadog/core/telemetry/logger.rbs b/sig/datadog/core/telemetry/logger.rbs index 214488d0d29..b32bf487c1c 100644 --- a/sig/datadog/core/telemetry/logger.rbs +++ b/sig/datadog/core/telemetry/logger.rbs @@ -2,7 +2,7 @@ module Datadog module Core module Telemetry module Logger - def self.report: (Exception exception, ?level: Symbol, ?description: String?, ?pii_safe: bool) -> void + def self.report: (Exception exception, ?level: Symbol, ?description: String?) -> void def self.error: (String description) -> void diff --git a/sig/datadog/core/telemetry/logging.rbs b/sig/datadog/core/telemetry/logging.rbs index d5cf10547e7..f64aa023f3c 100644 --- a/sig/datadog/core/telemetry/logging.rbs +++ b/sig/datadog/core/telemetry/logging.rbs @@ -12,7 +12,7 @@ module Datadog def self.from: (Exception exception) -> String? end - def report: (Exception exception, ?level: Symbol, ?description: String?, ?pii_safe: bool) -> void + def report: (Exception exception, ?level: Symbol, ?description: String?) -> void def error: (String description) -> void end diff --git a/spec/datadog/core/telemetry/logger_spec.rb b/spec/datadog/core/telemetry/logger_spec.rb index 92941fcd391..e67798f225e 100644 --- a/spec/datadog/core/telemetry/logger_spec.rb +++ b/spec/datadog/core/telemetry/logger_spec.rb @@ -15,7 +15,7 @@ it do expect(Datadog.logger).not_to receive(:warn) - expect(telemetry).to receive(:report).with(exception, level: :error, description: 'Oops...', pii_safe: false) + expect(telemetry).to receive(:report).with(exception, level: :error, description: 'Oops...') described_class.report(exception, level: :error, description: 'Oops...') end @@ -23,20 +23,12 @@ context 'when only given an exception' do it do expect(Datadog.logger).not_to receive(:warn) - expect(telemetry).to receive(:report).with(exception, level: :error, description: nil, pii_safe: false) + expect(telemetry).to receive(:report).with(exception, level: :error, description: nil) described_class.report(exception) end end - context 'when pii_safe is true' do - it do - expect(Datadog.logger).not_to receive(:warn) - expect(telemetry).to receive(:report).with(exception, level: :error, description: nil, pii_safe: true) - - described_class.report(exception, pii_safe: true) - end - end end context 'when there is no telemetry component configured' do diff --git a/spec/datadog/core/telemetry/logging_spec.rb b/spec/datadog/core/telemetry/logging_spec.rb index 36d50b96651..d7ad1ab0ea4 100644 --- a/spec/datadog/core/telemetry/logging_spec.rb +++ b/spec/datadog/core/telemetry/logging_spec.rb @@ -80,34 +80,6 @@ def log!(_event) end end - context 'when pii_safe is true' do - it 'sends a log event to via telemetry including the exception message' do - expect(component).to receive(:log!).with(instance_of(Datadog::Core::Telemetry::Event::Log)) do |event| - expect(event.message).to eq('StandardError: (Test exception message without pii)') - end - - component.report( - StandardError.new('Test exception message without pii'), - level: :error, - pii_safe: true, - ) - end - - context 'when a description is provided' do - it 'sends a log event to via telemetry including the description' do - expect(component).to receive(:log!).with(instance_of(Datadog::Core::Telemetry::Event::Log)) do |event| - expect(event.message).to eq('StandardError: Test description (Test exception message without pii)') - end - - component.report( - StandardError.new('Test exception message without pii'), - description: 'Test description', - level: :error, - pii_safe: true, - ) - end - end - end end describe '.error' do From f3e546b23b60894edd8378d2d6e05a90221ead49 Mon Sep 17 00:00:00 2001 From: Steven Bouwkamp Date: Thu, 16 Oct 2025 16:42:19 -0400 Subject: [PATCH 2/4] Fix linting errors --- spec/datadog/core/telemetry/logger_spec.rb | 1 - spec/datadog/core/telemetry/logging_spec.rb | 1 - 2 files changed, 2 deletions(-) diff --git a/spec/datadog/core/telemetry/logger_spec.rb b/spec/datadog/core/telemetry/logger_spec.rb index e67798f225e..d40b5fdaf58 100644 --- a/spec/datadog/core/telemetry/logger_spec.rb +++ b/spec/datadog/core/telemetry/logger_spec.rb @@ -28,7 +28,6 @@ described_class.report(exception) end end - end context 'when there is no telemetry component configured' do diff --git a/spec/datadog/core/telemetry/logging_spec.rb b/spec/datadog/core/telemetry/logging_spec.rb index d7ad1ab0ea4..f3a2547bbb3 100644 --- a/spec/datadog/core/telemetry/logging_spec.rb +++ b/spec/datadog/core/telemetry/logging_spec.rb @@ -79,7 +79,6 @@ def log!(_event) end end end - end describe '.error' do From 89842582dc1694c225d1a67eff02cc6442099f43 Mon Sep 17 00:00:00 2001 From: Loic Nageleisen Date: Fri, 17 Oct 2025 15:34:15 +0200 Subject: [PATCH 3/4] Test that exception message is *not* included --- spec/datadog/core/telemetry/logging_spec.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/spec/datadog/core/telemetry/logging_spec.rb b/spec/datadog/core/telemetry/logging_spec.rb index f3a2547bbb3..363bc1b380f 100644 --- a/spec/datadog/core/telemetry/logging_spec.rb +++ b/spec/datadog/core/telemetry/logging_spec.rb @@ -26,6 +26,7 @@ def log!(_event) logs: [{message: 'RuntimeError', level: 'ERROR', count: 1, stack_trace: a_string_including("\n/spec/")}] ) + expect(event.payload[:logs].map { |log| log[:message] }).not_to include('p@ssw0rd') end begin @@ -46,6 +47,7 @@ def log!(_event) logs: [{message: 'RuntimeError: Must not contain PII', level: 'ERROR', count: 1, stack_trace: a_string_including("\n/spec/")}] ) + expect(event.payload[:logs].map { |log| log[:message] }).not_to include('p@ssw0rd') end begin @@ -68,6 +70,7 @@ def log!(_event) logs: [{message: /# Date: Fri, 17 Oct 2025 13:59:25 -0400 Subject: [PATCH 4/4] Implement custom exception approach for errors from C code --- .../clock_id_from_pthread.c | 2 +- .../collectors_cpu_and_wall_time_worker.c | 6 ++-- .../collectors_discrete_dynamic_sampler.c | 2 +- .../collectors_gc_profiling_helper.c | 2 +- .../collectors_idle_sampling_helper.c | 2 +- .../collectors_stack.c | 4 +-- .../collectors_thread_context.c | 8 ++--- .../heap_recorder.c | 36 +++++++++---------- .../profiling.c | 11 ++++-- .../ruby_helpers.c | 8 +++-- .../ruby_helpers.h | 4 +++ .../setup_signal_handler.c | 2 +- .../stack_recorder.c | 6 ++-- lib/datadog/core/telemetry/logging.rb | 18 ++++++++-- lib/datadog/profiling.rb | 6 ++++ sig/datadog/core/telemetry/logging.rbs | 4 +++ 16 files changed, 79 insertions(+), 42 deletions(-) diff --git a/ext/datadog_profiling_native_extension/clock_id_from_pthread.c b/ext/datadog_profiling_native_extension/clock_id_from_pthread.c index be033110f73..9ffc1860e1b 100644 --- a/ext/datadog_profiling_native_extension/clock_id_from_pthread.c +++ b/ext/datadog_profiling_native_extension/clock_id_from_pthread.c @@ -18,7 +18,7 @@ void self_test_clock_id(void) { rb_nativethread_id_t expected_pthread_id = pthread_self(); rb_nativethread_id_t actual_pthread_id = pthread_id_for(rb_thread_current()); - if (expected_pthread_id != actual_pthread_id) rb_raise(rb_eRuntimeError, "pthread_id_for() self-test failed"); + if (expected_pthread_id != actual_pthread_id) rb_raise(datadog_profiling_error_class, "pthread_id_for() self-test failed"); } // Safety: This function is assumed never to raise exceptions by callers diff --git a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index 022c4588705..cd9e5aaebda 100644 --- a/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/datadog_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -289,7 +289,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) { after_gc_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID || after_gvl_running_from_postponed_job_handle == POSTPONED_JOB_HANDLE_INVALID ) { - rb_raise(rb_eRuntimeError, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)"); + rb_raise(datadog_profiling_error_class, "Failed to register profiler postponed jobs (got POSTPONED_JOB_HANDLE_INVALID)"); } #else gc_finalize_deferred_workaround = objspace_ptr_for_gc_finalize_deferred_workaround(); @@ -473,7 +473,7 @@ static VALUE _native_sampling_loop(DDTRACE_UNUSED VALUE _self, VALUE instance) { if (old_state != NULL) { if (is_thread_alive(old_state->owner_thread)) { rb_raise( - rb_eRuntimeError, + datadog_profiling_error_class, "Could not start CpuAndWallTimeWorker: There's already another instance of CpuAndWallTimeWorker active in a different thread" ); } else { @@ -1284,7 +1284,7 @@ static VALUE rescued_sample_allocation(DDTRACE_UNUSED VALUE unused) { static void delayed_error(cpu_and_wall_time_worker_state *state, const char *error) { // If we can't raise an immediate exception at the calling site, use the asynchronous flow through the main worker loop. - stop_state(state, rb_exc_new_cstr(rb_eRuntimeError, error)); + stop_state(state, rb_exc_new_cstr(datadog_profiling_error_class, error)); } static VALUE _native_delayed_error(DDTRACE_UNUSED VALUE self, VALUE instance, VALUE error_msg) { diff --git a/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c b/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c index 931adf85cb5..dd089a8db19 100644 --- a/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c +++ b/ext/datadog_profiling_native_extension/collectors_discrete_dynamic_sampler.c @@ -369,7 +369,7 @@ static VALUE _native_new(VALUE klass) { long now_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE); if (now_ns == 0) { - rb_raise(rb_eRuntimeError, "failed to get clock time"); + rb_raise(datadog_profiling_error_class, "failed to get clock time"); } discrete_dynamic_sampler_init(&state->sampler, "test sampler", now_ns); diff --git a/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c b/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c index e9dd6c727d9..c9ab53ed64e 100644 --- a/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c +++ b/ext/datadog_profiling_native_extension/collectors_gc_profiling_helper.c @@ -119,7 +119,7 @@ uint8_t gc_profiling_set_metadata(ddog_prof_Label *labels, int labels_length) { }; if (label_pos > max_label_count) { - rb_raise(rb_eRuntimeError, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); + rb_raise(datadog_profiling_error_class, "BUG: gc_profiling_set_metadata unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); } return label_pos; diff --git a/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c b/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c index fcbc42eaa94..23586d240fb 100644 --- a/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c +++ b/ext/datadog_profiling_native_extension/collectors_idle_sampling_helper.c @@ -153,7 +153,7 @@ static void *run_idle_sampling_loop(void *state_ptr) { // Process pending action if (next_action == ACTION_RUN) { if (run_action_function == NULL) { - grab_gvl_and_raise(rb_eRuntimeError, "Unexpected NULL run_action_function in run_idle_sampling_loop"); + grab_gvl_and_raise(datadog_profiling_error_class, "Unexpected NULL run_action_function in run_idle_sampling_loop"); } run_action_function(); diff --git a/ext/datadog_profiling_native_extension/collectors_stack.c b/ext/datadog_profiling_native_extension/collectors_stack.c index bf60dbc78b8..cf79eaf00a6 100644 --- a/ext/datadog_profiling_native_extension/collectors_stack.c +++ b/ext/datadog_profiling_native_extension/collectors_stack.c @@ -284,11 +284,11 @@ void sample_thread( // here, but >= 0 makes this easier to understand/debug. bool only_wall_time = cpu_or_wall_sample && values.cpu_time_ns == 0 && values.wall_time_ns >= 0; - if (cpu_or_wall_sample && state_label == NULL) rb_raise(rb_eRuntimeError, "BUG: Unexpected missing state_label"); + if (cpu_or_wall_sample && state_label == NULL) rb_raise(datadog_profiling_error_class, "BUG: Unexpected missing state_label"); if (has_cpu_time) { state_label->str = DDOG_CHARSLICE_C("had cpu"); - if (labels.is_gvl_waiting_state) rb_raise(rb_eRuntimeError, "BUG: Unexpected combination of cpu-time with is_gvl_waiting"); + if (labels.is_gvl_waiting_state) rb_raise(datadog_profiling_error_class, "BUG: Unexpected combination of cpu-time with is_gvl_waiting"); } int top_of_stack_position = captured_frames - 1; diff --git a/ext/datadog_profiling_native_extension/collectors_thread_context.c b/ext/datadog_profiling_native_extension/collectors_thread_context.c index f2cc76c6021..c2c38bae845 100644 --- a/ext/datadog_profiling_native_extension/collectors_thread_context.c +++ b/ext/datadog_profiling_native_extension/collectors_thread_context.c @@ -831,7 +831,7 @@ VALUE thread_context_collector_sample_after_gc(VALUE self_instance) { TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state); if (state->gc_tracking.wall_time_at_previous_gc_ns == INVALID_TIME) { - rb_raise(rb_eRuntimeError, "BUG: Unexpected call to sample_after_gc without valid GC information available"); + rb_raise(datadog_profiling_error_class, "BUG: Unexpected call to sample_after_gc without valid GC information available"); } int max_labels_needed_for_gc = 7; // Magic number gets validated inside gc_profiling_set_metadata @@ -998,7 +998,7 @@ static void trigger_sample_for_thread( // @ivoanjo: I wonder if C compilers are smart enough to statically prove this check never triggers unless someone // changes the code erroneously and remove it entirely? if (label_pos > max_label_count) { - rb_raise(rb_eRuntimeError, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); + rb_raise(datadog_profiling_error_class, "BUG: Unexpected label_pos (%d) > max_label_count (%d)", label_pos, max_label_count); } ddog_prof_Slice_Label slice_labels = {.ptr = labels, .len = label_pos}; @@ -1295,7 +1295,7 @@ static long update_time_since_previous_sample(long *time_at_previous_sample_ns, elapsed_time_ns = 0; } else { // We don't expect non-wall time to go backwards, so let's flag this as a bug - rb_raise(rb_eRuntimeError, "BUG: Unexpected negative elapsed_time_ns between samples"); + rb_raise(datadog_profiling_error_class, "BUG: Unexpected negative elapsed_time_ns between samples"); } } @@ -1961,7 +1961,7 @@ static uint64_t otel_span_id_to_uint(VALUE otel_span_id) { thread_context_collector_state *state; TypedData_Get_Struct(self_instance, thread_context_collector_state, &thread_context_collector_typed_data, state); - if (!state->timeline_enabled) rb_raise(rb_eRuntimeError, "GVL profiling requires timeline to be enabled"); + if (!state->timeline_enabled) rb_raise(datadog_profiling_error_class, "GVL profiling requires timeline to be enabled"); intptr_t gvl_waiting_at = gvl_profiling_state_thread_object_get(current_thread); diff --git a/ext/datadog_profiling_native_extension/heap_recorder.c b/ext/datadog_profiling_native_extension/heap_recorder.c index f869d24c6d3..3a6eaee7ccf 100644 --- a/ext/datadog_profiling_native_extension/heap_recorder.c +++ b/ext/datadog_profiling_native_extension/heap_recorder.c @@ -300,7 +300,7 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj } if (heap_recorder->active_recording != NULL) { - rb_raise(rb_eRuntimeError, "Detected consecutive heap allocation recording starts without end."); + rb_raise(datadog_profiling_error_class, "Detected consecutive heap allocation recording starts without end."); } if (++heap_recorder->num_recordings_skipped < heap_recorder->sample_rate || @@ -323,7 +323,7 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj VALUE ruby_obj_id = rb_obj_id(new_obj); if (!FIXNUM_P(ruby_obj_id)) { - rb_raise(rb_eRuntimeError, "Detected a bignum object id. These are not supported by heap profiling."); + rb_raise(datadog_profiling_error_class, "Detected a bignum object id. These are not supported by heap profiling."); } heap_recorder->active_recording = object_record_new( @@ -371,7 +371,7 @@ static VALUE end_heap_allocation_recording(VALUE protect_args) { if (active_recording == NULL) { // Recording ended without having been started? - rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started"); + rb_raise(datadog_profiling_error_class, "Ended a heap recording that was not started"); } // From now on, mark the global active recording as invalid so we can short-circuit at any point // and not end up with a still active recording. the local active_recording still holds the @@ -487,14 +487,14 @@ void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) { if (heap_recorder->object_records_snapshot != NULL) { // we could trivially handle this but we raise to highlight and catch unexpected usages. - rb_raise(rb_eRuntimeError, "New heap recorder iteration prepared without the previous one having been finished."); + rb_raise(datadog_profiling_error_class, "New heap recorder iteration prepared without the previous one having been finished."); } heap_recorder_update(heap_recorder, /* full_update: */ true); heap_recorder->object_records_snapshot = st_copy(heap_recorder->object_records); if (heap_recorder->object_records_snapshot == NULL) { - rb_raise(rb_eRuntimeError, "Failed to create heap snapshot."); + rb_raise(datadog_profiling_error_class, "Failed to create heap snapshot."); } } @@ -505,7 +505,7 @@ void heap_recorder_finish_iteration(heap_recorder *heap_recorder) { if (heap_recorder->object_records_snapshot == NULL) { // we could trivially handle this but we raise to highlight and catch unexpected usages. - rb_raise(rb_eRuntimeError, "Heap recorder iteration finished without having been prepared."); + rb_raise(datadog_profiling_error_class, "Heap recorder iteration finished without having been prepared."); } st_free_table(heap_recorder->object_records_snapshot); @@ -733,7 +733,7 @@ static void commit_recording(heap_recorder *heap_recorder, heap_record *heap_rec // needed to fully build the object_record. active_recording->heap_record = heap_record; if (heap_record->num_tracked_objects == UINT32_MAX) { - rb_raise(rb_eRuntimeError, "Reached maximum number of tracked objects for heap record"); + rb_raise(datadog_profiling_error_class, "Reached maximum number of tracked objects for heap record"); } heap_record->num_tracked_objects++; @@ -741,11 +741,11 @@ static void commit_recording(heap_recorder *heap_recorder, heap_record *heap_rec if (existing_error) { object_record *existing_record = NULL; st_lookup(heap_recorder->object_records, active_recording->obj_id, (st_data_t *) &existing_record); - if (existing_record == NULL) rb_raise(rb_eRuntimeError, "Unexpected NULL when reading existing record"); + if (existing_record == NULL) rb_raise(datadog_profiling_error_class, "Unexpected NULL when reading existing record"); VALUE existing_inspect = object_record_inspect(heap_recorder, existing_record); VALUE new_inspect = object_record_inspect(heap_recorder, active_recording); - rb_raise(rb_eRuntimeError, "Object ids are supposed to be unique. We got 2 allocation recordings with " + rb_raise(datadog_profiling_error_class, "Object ids are supposed to be unique. We got 2 allocation recordings with " "the same id. previous={%"PRIsVALUE"} new={%"PRIsVALUE"}", existing_inspect, new_inspect); } } @@ -781,7 +781,7 @@ static void cleanup_heap_record_if_unused(heap_recorder *heap_recorder, heap_rec } if (!st_delete(heap_recorder->heap_records, (st_data_t*) &heap_record, NULL)) { - rb_raise(rb_eRuntimeError, "Attempted to cleanup an untracked heap_record"); + rb_raise(datadog_profiling_error_class, "Attempted to cleanup an untracked heap_record"); }; heap_record_free(heap_recorder, heap_record); } @@ -791,14 +791,14 @@ static void on_committed_object_record_cleanup(heap_recorder *heap_recorder, obj // (See PROF-10656 Datadog-internal for details). Just in case, I've sprinkled a bunch of NULL tests in this function for now. // Once we figure out the issue we can get rid of them again. - if (heap_recorder == NULL) rb_raise(rb_eRuntimeError, "heap_recorder was NULL in on_committed_object_record_cleanup"); - if (heap_recorder->heap_records == NULL) rb_raise(rb_eRuntimeError, "heap_recorder->heap_records was NULL in on_committed_object_record_cleanup"); - if (record == NULL) rb_raise(rb_eRuntimeError, "record was NULL in on_committed_object_record_cleanup"); + if (heap_recorder == NULL) rb_raise(datadog_profiling_error_class, "heap_recorder was NULL in on_committed_object_record_cleanup"); + if (heap_recorder->heap_records == NULL) rb_raise(datadog_profiling_error_class, "heap_recorder->heap_records was NULL in on_committed_object_record_cleanup"); + if (record == NULL) rb_raise(datadog_profiling_error_class, "record was NULL in on_committed_object_record_cleanup"); // Starting with the associated heap record. There will now be one less tracked object pointing to it heap_record *heap_record = record->heap_record; - if (heap_record == NULL) rb_raise(rb_eRuntimeError, "heap_record was NULL in on_committed_object_record_cleanup"); + if (heap_record == NULL) rb_raise(datadog_profiling_error_class, "heap_record was NULL in on_committed_object_record_cleanup"); heap_record->num_tracked_objects--; @@ -862,7 +862,7 @@ heap_record* heap_record_new(heap_recorder *recorder, ddog_prof_Slice_Location l uint16_t frames_len = locations.len; if (frames_len > MAX_FRAMES_LIMIT) { // This is not expected as MAX_FRAMES_LIMIT is shared with the stacktrace construction mechanism - rb_raise(rb_eRuntimeError, "Found stack with more than %d frames (%d)", MAX_FRAMES_LIMIT, frames_len); + rb_raise(datadog_profiling_error_class, "Found stack with more than %d frames (%d)", MAX_FRAMES_LIMIT, frames_len); } heap_record *stack = calloc(1, sizeof(heap_record) + frames_len * sizeof(heap_frame)); // See "note on calloc vs ruby_xcalloc use" above stack->num_tracked_objects = 0; @@ -933,21 +933,21 @@ static void unintern_or_raise(heap_recorder *recorder, ddog_prof_ManagedStringId ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_unintern(recorder->string_storage, id); if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(rb_eRuntimeError, "Failed to unintern id: %"PRIsVALUE, get_error_details_and_drop(&result.some)); + rb_raise(datadog_profiling_error_class, "Failed to unintern id: %"PRIsVALUE, get_error_details_and_drop(&result.some)); } } static void unintern_all_or_raise(heap_recorder *recorder, ddog_prof_Slice_ManagedStringId ids) { ddog_prof_MaybeError result = ddog_prof_ManagedStringStorage_unintern_all(recorder->string_storage, ids); if (result.tag == DDOG_PROF_OPTION_ERROR_SOME_ERROR) { - rb_raise(rb_eRuntimeError, "Failed to unintern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some)); + rb_raise(datadog_profiling_error_class, "Failed to unintern_all: %"PRIsVALUE, get_error_details_and_drop(&result.some)); } } static VALUE get_ruby_string_or_raise(heap_recorder *recorder, ddog_prof_ManagedStringId id) { ddog_StringWrapperResult get_string_result = ddog_prof_ManagedStringStorage_get_string(recorder->string_storage, id); if (get_string_result.tag == DDOG_STRING_WRAPPER_RESULT_ERR) { - rb_raise(rb_eRuntimeError, "Failed to get string: %"PRIsVALUE, get_error_details_and_drop(&get_string_result.err)); + rb_raise(datadog_profiling_error_class, "Failed to get string: %"PRIsVALUE, get_error_details_and_drop(&get_string_result.err)); } VALUE ruby_string = ruby_string_from_vec_u8(get_string_result.ok.message); ddog_StringWrapper_drop((ddog_StringWrapper *) &get_string_result.ok); diff --git a/ext/datadog_profiling_native_extension/profiling.c b/ext/datadog_profiling_native_extension/profiling.c index bcf97709c14..2fcc91e3e41 100644 --- a/ext/datadog_profiling_native_extension/profiling.c +++ b/ext/datadog_profiling_native_extension/profiling.c @@ -55,6 +55,11 @@ void DDTRACE_EXPORT Init_datadog_profiling_native_extension(void) { rb_define_singleton_method(native_extension_module, "native_working?", native_working_p, 0); rb_funcall(native_extension_module, rb_intern("private_class_method"), 1, ID2SYM(rb_intern("native_working?"))); + // Initialize the ProfilingError exception class reference + // This exception class should be defined in Ruby code (lib/datadog/profiling.rb) + datadog_profiling_error_class = rb_const_get(profiling_module, rb_intern("ProfilingError")); + rb_global_variable(&datadog_profiling_error_class); + ruby_helpers_init(); collectors_cpu_and_wall_time_worker_init(profiling_module); collectors_discrete_dynamic_sampler_init(profiling_module); @@ -115,7 +120,7 @@ static VALUE _native_grab_gvl_and_raise(DDTRACE_UNUSED VALUE _self, VALUE except grab_gvl_and_raise(args.exception_class, "%s", args.test_message); } - rb_raise(rb_eRuntimeError, "Failed to raise exception in _native_grab_gvl_and_raise; this should never happen"); + rb_raise(datadog_profiling_error_class, "Failed to raise exception in _native_grab_gvl_and_raise; this should never happen"); } static void *trigger_grab_gvl_and_raise(void *trigger_args) { @@ -151,7 +156,7 @@ static VALUE _native_grab_gvl_and_raise_syserr(DDTRACE_UNUSED VALUE _self, VALUE grab_gvl_and_raise_syserr(args.syserr_errno, "%s", args.test_message); } - rb_raise(rb_eRuntimeError, "Failed to raise exception in _native_grab_gvl_and_raise_syserr; this should never happen"); + rb_raise(datadog_profiling_error_class, "Failed to raise exception in _native_grab_gvl_and_raise_syserr; this should never happen"); } static void *trigger_grab_gvl_and_raise_syserr(void *trigger_args) { @@ -246,7 +251,7 @@ static VALUE _native_trigger_holding_the_gvl_signal_handler_on(DDTRACE_UNUSED VA replace_sigprof_signal_handler_with_empty_handler(holding_the_gvl_signal_handler); - if (holding_the_gvl_signal_handler_result[0] == Qfalse) rb_raise(rb_eRuntimeError, "Could not signal background_thread"); + if (holding_the_gvl_signal_handler_result[0] == Qfalse) rb_raise(datadog_profiling_error_class, "Could not signal background_thread"); VALUE result = rb_hash_new(); rb_hash_aset(result, ID2SYM(rb_intern("ruby_thread_has_gvl_p")), holding_the_gvl_signal_handler_result[1]); diff --git a/ext/datadog_profiling_native_extension/ruby_helpers.c b/ext/datadog_profiling_native_extension/ruby_helpers.c index 61b9db1af4f..71a79fb8240 100644 --- a/ext/datadog_profiling_native_extension/ruby_helpers.c +++ b/ext/datadog_profiling_native_extension/ruby_helpers.c @@ -12,6 +12,10 @@ static ID _id2ref_id = Qnil; static ID inspect_id = Qnil; static ID to_s_id = Qnil; +// Global reference to Datadog::Profiling::ProfilingError exception class +// Initialized in profiling.c during extension initialization +VALUE datadog_profiling_error_class = Qnil; + void ruby_helpers_init(void) { rb_global_variable(&module_object_space); @@ -44,7 +48,7 @@ void grab_gvl_and_raise(VALUE exception_class, const char *format_string, ...) { if (is_current_thread_holding_the_gvl()) { rb_raise( - rb_eRuntimeError, + datadog_profiling_error_class, "grab_gvl_and_raise called by thread holding the global VM lock. exception_message: '%s'", args.exception_message ); @@ -76,7 +80,7 @@ void grab_gvl_and_raise_syserr(int syserr_errno, const char *format_string, ...) if (is_current_thread_holding_the_gvl()) { rb_raise( - rb_eRuntimeError, + datadog_profiling_error_class, "grab_gvl_and_raise_syserr called by thread holding the global VM lock. syserr_errno: %d, exception_message: '%s'", syserr_errno, args.exception_message diff --git a/ext/datadog_profiling_native_extension/ruby_helpers.h b/ext/datadog_profiling_native_extension/ruby_helpers.h index 5c135a21d14..b59b14a1af3 100644 --- a/ext/datadog_profiling_native_extension/ruby_helpers.h +++ b/ext/datadog_profiling_native_extension/ruby_helpers.h @@ -3,6 +3,10 @@ #include #include "datadog_ruby_common.h" +// Global reference to Datadog::Profiling::ProfilingError exception class +// This is initialized in profiling.c during extension initialization +extern VALUE datadog_profiling_error_class; + // Initialize internal data needed by some ruby helpers. Should be called during start, before any actual // usage of ruby helpers. void ruby_helpers_init(void); diff --git a/ext/datadog_profiling_native_extension/setup_signal_handler.c b/ext/datadog_profiling_native_extension/setup_signal_handler.c index fa4bb27e95c..bab7897710d 100644 --- a/ext/datadog_profiling_native_extension/setup_signal_handler.c +++ b/ext/datadog_profiling_native_extension/setup_signal_handler.c @@ -72,7 +72,7 @@ static void install_sigprof_signal_handler_internal( } rb_raise( - rb_eRuntimeError, + datadog_profiling_error_class, "Could not install profiling signal handler (%s): There's a pre-existing SIGPROF signal handler", handler_pretty_name ); diff --git a/ext/datadog_profiling_native_extension/stack_recorder.c b/ext/datadog_profiling_native_extension/stack_recorder.c index bc3b3439500..05bffa5bee4 100644 --- a/ext/datadog_profiling_native_extension/stack_recorder.c +++ b/ext/datadog_profiling_native_extension/stack_recorder.c @@ -770,10 +770,10 @@ static void build_heap_profile_without_gvl(stack_recorder_state *state, profile_ // same locks are acquired by the heap recorder while holding the gvl (since we'd be operating on the // same locks but acquiring them in different order). if (!iterated) { - grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: iteration cancelled"); + grab_gvl_and_raise(datadog_profiling_error_class, "Failure during heap profile building: iteration cancelled"); } else if (iteration_context.error) { - grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: %s", iteration_context.error_msg); + grab_gvl_and_raise(datadog_profiling_error_class, "Failure during heap profile building: %s", iteration_context.error_msg); } } @@ -835,7 +835,7 @@ static profile_slot* serializer_flip_active_and_inactive_slots(stack_recorder_st int previously_active_slot = state->active_slot; if (previously_active_slot != 1 && previously_active_slot != 2) { - grab_gvl_and_raise(rb_eRuntimeError, "Unexpected active_slot state %d in serializer_flip_active_and_inactive_slots", previously_active_slot); + grab_gvl_and_raise(datadog_profiling_error_class, "Unexpected active_slot state %d in serializer_flip_active_and_inactive_slots", previously_active_slot); } pthread_mutex_t *previously_active = (previously_active_slot == 1) ? &state->mutex_slot_one : &state->mutex_slot_two; diff --git a/lib/datadog/core/telemetry/logging.rb b/lib/datadog/core/telemetry/logging.rb index e1019beca4a..da3fa3d1d53 100644 --- a/lib/datadog/core/telemetry/logging.rb +++ b/lib/datadog/core/telemetry/logging.rb @@ -49,9 +49,13 @@ def report(exception, level: :error, description: nil) # Anonymous exceptions to be logged as message = +"#{exception.class.name || exception.class.inspect}" # standard:disable Style/RedundantInterpolation - if description + # Only include exception message if it's a ProfilingError (which we know contains safe messages) + exception_message = safe_exception_message(exception) + + if description || exception_message message << ':' - message << " #{description}" + message << " #{description}" if description + message << " (#{exception_message})" if exception_message end event = Event::Log.new( @@ -63,6 +67,16 @@ def report(exception, level: :error, description: nil) log!(event) end + private + + def safe_exception_message(exception) + # Only include exception messages from ProfilingError, as those are guaranteed to be created by us + # with constant, PII-safe messages + if defined?(Datadog::Profiling::ProfilingError) && exception.is_a?(Datadog::Profiling::ProfilingError) + exception.message + end + end + def error(description) event = Event::Log.new(message: description, level: :error) diff --git a/lib/datadog/profiling.rb b/lib/datadog/profiling.rb index f867e3f9793..f644cc32b7a 100644 --- a/lib/datadog/profiling.rb +++ b/lib/datadog/profiling.rb @@ -7,6 +7,12 @@ module Datadog # Datadog Continuous Profiler implementation: https://docs.datadoghq.com/profiler/ module Profiling + # Custom exception class for profiler errors. + # This exception class is used by the profiler's C code to signal errors. + # Telemetry will only include exception messages for instances of this class, + # ensuring that only known-safe messages (created by Datadog code) are reported. + class ProfilingError < StandardError; end + def self.supported? unsupported_reason.nil? end diff --git a/sig/datadog/core/telemetry/logging.rbs b/sig/datadog/core/telemetry/logging.rbs index f64aa023f3c..a828b564b27 100644 --- a/sig/datadog/core/telemetry/logging.rbs +++ b/sig/datadog/core/telemetry/logging.rbs @@ -15,6 +15,10 @@ module Datadog def report: (Exception exception, ?level: Symbol, ?description: String?) -> void def error: (String description) -> void + + private + + def safe_exception_message: (Exception exception) -> String? end end end