Skip to content

Commit

Permalink
DEBUG-2334 remaining dynamic instrumentation code (#4098)
Browse files Browse the repository at this point in the history
- Adds DI Remote and integrates DI into remote config processing
- Integration test starting from remote config parsing, through instrumentation to snapshot sending to backend
- DI is now enabled by default
- DI code tracking is now enabled by default (script_compiled trace point) if DD_DYNAMIC_INSTRUMENTATION_ENABLED is set in environment
  • Loading branch information
p-datadog authored Nov 25, 2024
1 parent 5b73da4 commit 23a33f9
Show file tree
Hide file tree
Showing 23 changed files with 1,042 additions and 92 deletions.
3 changes: 3 additions & 0 deletions lib/datadog.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,7 @@
# Load other products (must follow tracing)
require_relative 'datadog/profiling'
require_relative 'datadog/appsec'
# Line probes will not work on Ruby < 2.6 because of lack of :script_compiled
# trace point. Only load DI on supported Ruby versions.
require_relative 'datadog/di' if RUBY_VERSION >= '2.6'
require_relative 'datadog/kit'
10 changes: 9 additions & 1 deletion lib/datadog/core/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,16 @@ def build_components(settings)
def replace_components!(settings, old)
components = Components.new(settings)

# Carry over state from existing components to the new ones.
# Currently, if we already started the remote component (which
# happens after a request goes through installed Rack middleware),
# we will start the new remote component as well.
old_state = {
remote_started: old.remote&.started?,
}

old.shutdown!(components)
components.startup!(settings)
components.startup!(settings, old_state: old_state)
components
end

Expand Down
18 changes: 17 additions & 1 deletion lib/datadog/core/configuration/components.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
require_relative '../../tracing/component'
require_relative '../../profiling/component'
require_relative '../../appsec/component'
require_relative '../../di/component'
require_relative '../crashtracking/component'

module Datadog
Expand Down Expand Up @@ -83,6 +84,7 @@ def build_crashtracker(settings, agent_settings, logger:)
:telemetry,
:tracer,
:crashtracker,
:dynamic_instrumentation,
:appsec

def initialize(settings)
Expand Down Expand Up @@ -110,12 +112,13 @@ def initialize(settings)
@runtime_metrics = self.class.build_runtime_metrics_worker(settings)
@health_metrics = self.class.build_health_metrics(settings)
@appsec = Datadog::AppSec::Component.build_appsec_component(settings, telemetry: telemetry)
@dynamic_instrumentation = Datadog::DI::Component.build(settings, agent_settings, telemetry: telemetry)

self.class.configure_tracing(settings)
end

# Starts up components
def startup!(settings)
def startup!(settings, old_state: nil)
if settings.profiling.enabled
if profiler
profiler.start
Expand All @@ -126,6 +129,16 @@ def startup!(settings)
end
end

if settings.remote.enabled && old_state&.[](:remote_started)
# The library was reconfigured and previously it already started
# the remote component (i.e., it received at least one request
# through the installed Rack middleware which started the remote).
# If the new configuration also has remote enabled, start the
# new remote right away.
# remote should always be not nil here but steep doesn't know this.
remote&.start
end

Core::Diagnostics::EnvironmentLogger.collect_and_log!(@environment_logger_extra)
end

Expand All @@ -136,6 +149,9 @@ def shutdown!(replacement = nil)
# Shutdown remote configuration
remote.shutdown! if remote

# Shutdown DI after remote, since remote config triggers DI operations.
dynamic_instrumentation&.shutdown!

# Decommission AppSec
appsec.shutdown! if appsec

Expand Down
124 changes: 65 additions & 59 deletions lib/datadog/core/remote/client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,93 +24,99 @@ def initialize(transport, capabilities, repository: Configuration::Repository.ne
@dispatcher = Dispatcher.new(@capabilities.receivers)
end

# rubocop:disable Metrics/AbcSize,Metrics/PerceivedComplexity,Metrics/MethodLength,Metrics/CyclomaticComplexity
def sync
# TODO: Skip sync if no capabilities are registered
response = transport.send_config(payload)

if response.ok?
# when response is completely empty, do nothing as in: leave as is
if response.empty?
Datadog.logger.debug { 'remote: empty response => NOOP' }
process_response(response)
elsif response.internal_error?
raise TransportError, response.to_s
end
end

return
end
private

begin
paths = response.client_configs.map do |path|
Configuration::Path.parse(path)
end
def process_response(response)
# when response is completely empty, do nothing as in: leave as is
if response.empty?
Datadog.logger.debug { 'remote: empty response => NOOP' }

targets = Configuration::TargetMap.parse(response.targets)
return
end

contents = Configuration::ContentList.parse(response.target_files)
rescue Remote::Configuration::Path::ParseError => e
raise SyncError, e.message
begin
paths = response.client_configs.map do |path|
Configuration::Path.parse(path)
end

# To make sure steep does not complain
return unless paths && targets && contents
targets = Configuration::TargetMap.parse(response.targets)

contents = Configuration::ContentList.parse(response.target_files)
rescue Remote::Configuration::Path::ParseError => e
raise SyncError, e.message
end

# TODO: sometimes it can strangely be so that paths.empty?
# TODO: sometimes it can strangely be so that targets.empty?
# To make sure steep does not complain
return unless paths && targets && contents

changes = repository.transaction do |current, transaction|
# paths to be removed: previously applied paths minus ingress paths
(current.paths - paths).each { |p| transaction.delete(p) }
# TODO: sometimes it can strangely be so that paths.empty?
# TODO: sometimes it can strangely be so that targets.empty?

# go through each ingress path
paths.each do |path|
# match target with path
target = targets[path]
apply_config(paths, targets, contents)
end

# abort entirely if matching target not found
raise SyncError, "no target for path '#{path}'" if target.nil?
def apply_config(paths, targets, contents)
changes = repository.transaction do |current, transaction|
# paths to be removed: previously applied paths minus ingress paths
(current.paths - paths).each { |p| transaction.delete(p) }

# new paths are not in previously applied paths
new = !current.paths.include?(path)
# go through each ingress path
paths.each do |path|
# match target with path
target = targets[path]

# updated paths are in previously applied paths
# but the content hash changed
changed = current.paths.include?(path) && !current.contents.find_content(path, target)
# abort entirely if matching target not found
raise SyncError, "no target for path '#{path}'" if target.nil?

# skip if unchanged
same = !new && !changed
# new paths are not in previously applied paths
new = !current.paths.include?(path)

next if same
# updated paths are in previously applied paths
# but the content hash changed
changed = current.paths.include?(path) && !current.contents.find_content(path, target)

# match content with path and target
content = contents.find_content(path, target)
# skip if unchanged
same = !new && !changed

# abort entirely if matching content not found
raise SyncError, "no valid content for target at path '#{path}'" if content.nil?
next if same

# to be added or updated << config
# TODO: metadata (hash, version, etc...)
transaction.insert(path, target, content) if new
transaction.update(path, target, content) if changed
end
# match content with path and target
content = contents.find_content(path, target)

# save backend opaque backend state
transaction.set(opaque_backend_state: targets.opaque_backend_state)
transaction.set(targets_version: targets.version)
# abort entirely if matching content not found
raise SyncError, "no valid content for target at path '#{path}'" if content.nil?

# upon transaction end, new list of applied config + metadata (add, change, remove) will be saved
# TODO: also remove stale config (matching removed) from cache (client configs is exhaustive list of paths)
# to be added or updated << config
# TODO: metadata (hash, version, etc...)
transaction.insert(path, target, content) if new
transaction.update(path, target, content) if changed
end

if changes.empty?
Datadog.logger.debug { 'remote: no changes' }
else
dispatcher.dispatch(changes, repository)
end
elsif response.internal_error?
raise TransportError, response.to_s
# save backend opaque backend state
transaction.set(opaque_backend_state: targets.opaque_backend_state)
transaction.set(targets_version: targets.version)

# upon transaction end, new list of applied config + metadata (add, change, remove) will be saved
# TODO: also remove stale config (matching removed) from cache (client configs is exhaustive list of paths)
end
end
# rubocop:enable Metrics/AbcSize,Metrics/PerceivedComplexity,Metrics/MethodLength,Metrics/CyclomaticComplexity

private
if changes.empty?
Datadog.logger.debug { 'remote: no changes' }
else
dispatcher.dispatch(changes, repository)
end
end

def payload # rubocop:disable Metrics/MethodLength
state = repository.state
Expand Down
6 changes: 6 additions & 0 deletions lib/datadog/core/remote/client/capabilities.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ def register(settings)
register_receivers(Datadog::AppSec::Remote.receivers(@telemetry))
end

if settings.respond_to?(:dynamic_instrumentation) && settings.dynamic_instrumentation.enabled
register_capabilities(Datadog::DI::Remote.capabilities)
register_products(Datadog::DI::Remote.products)
register_receivers(Datadog::DI::Remote.receivers(@telemetry))
end

register_capabilities(Datadog::Tracing::Remote.capabilities)
register_products(Datadog::Tracing::Remote.products)
register_receivers(Datadog::Tracing::Remote.receivers(@telemetry))
Expand Down
104 changes: 84 additions & 20 deletions lib/datadog/di.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
require_relative 'di/probe_notification_builder'
require_relative 'di/probe_notifier_worker'
require_relative 'di/redactor'
require_relative 'di/remote'
require_relative 'di/serializer'
require_relative 'di/transport'
require_relative 'di/utils'
Expand All @@ -25,6 +26,9 @@
# and AR should be loaded before any application code is loaded, being
# part of Rails, therefore for now we should be OK to just require the
# AR integration from here.
#
# TODO this require might need to be delayed via Rails post-initialization
# logic?
require_relative 'di/contrib/active_record'
end

Expand All @@ -42,6 +46,8 @@ def enabled?
# Expose DI to global shared objects
Extensions.activate!

LOCK = Mutex.new

class << self
attr_reader :code_tracker

Expand All @@ -58,6 +64,32 @@ def activate_tracking!
(@code_tracker ||= CodeTracker.new).start
end

# Activates code tracking if possible.
#
# This method does nothing if invoked in an environment that does not
# implement required trace points for code tracking (MRI Ruby < 2.6,
# JRuby) and rescues any exceptions that may be raised by downstream
# DI code.
def activate_tracking
# :script_compiled trace point was added in Ruby 2.6.
return unless RUBY_VERSION >= '2.6'

begin
# Activate code tracking by default because line trace points will not work
# without it.
Datadog::DI.activate_tracking!
rescue => exc
if defined?(Datadog.logger)
Datadog.logger.warn("Failed to activate code tracking for DI: #{exc.class}: #{exc}")
else
# We do not have Datadog logger potentially because DI code tracker is
# being loaded early in application boot process and the rest of datadog
# wasn't loaded yet. Output to standard error.
warn("Failed to activate code tracking for DI: #{exc.class}: #{exc}")
end
end
end

# Deactivates code tracking. In normal usage of DI this method should
# never be called, however it is used by DI's test suite to reset
# state for individual tests.
Expand All @@ -76,30 +108,62 @@ def code_tracking_active?
code_tracker&.active? || false
end

# This method is called from DI Remote handler to issue DI operations
# to the probe manager (add or remove probes).
#
# When DI Remote is executing, Datadog.components should be initialized
# and we should be able to reference it to get to the DI component.
#
# Given that we need the current_component anyway for code tracker,
# perhaps we should delete the +component+ method and just use
# +current_component+ in all cases.
def component
# TODO uncomment when remote is merged
#Datadog.send(:components).dynamic_instrumentation
Datadog.send(:components).dynamic_instrumentation
end

# DI code tracker is instantiated globally before the regular set of
# components is created, but the code tracker needs to call out to the
# "current" DI component to perform instrumentation when application
# code is loaded. Because this call may happen prior to Datadog
# components having been initialized, we maintain the "current component"
# which contains a reference to the most recently instantiated
# DI::Component. This way, if a DI component hasn't been instantiated,
# we do not try to reference Datadog.components.
def current_component
LOCK.synchronize do
@current_components&.last
end
end

# To avoid potential races with DI::Component being added and removed,
# we maintain a list of the components. Normally the list should contain
# either zero or one component depending on whether DI is enabled in
# Datadog configuration. However, if a new instance of DI::Component
# is created while the previous instance is still running, we are
# guaranteed to not end up with no component when one is running.
def add_current_component(component)
LOCK.synchronize do
@current_components ||= []
@current_components << component
end
end

def remove_current_component(component)
LOCK.synchronize do
@current_components&.delete(component)
end
end
end
end
end

=begin not yet enabled
# :script_compiled trace point was added in Ruby 2.6.
if RUBY_VERSION >= '2.6'
begin
# Activate code tracking by default because line trace points will not work
# without it.
Datadog::DI.activate_tracking!
rescue => exc
if defined?(Datadog.logger)
Datadog.logger.warn("Failed to activate code tracking for DI: #{exc.class}: #{exc}")
else
# We do not have Datadog logger potentially because DI code tracker is
# being loaded early in application boot process and the rest of datadog
# wasn't loaded yet. Output to standard error.
warn("Failed to activate code tracking for DI: #{exc.class}: #{exc}")
end
end
if %w(1 true).include?(ENV['DD_DYNAMIC_INSTRUMENTATION_ENABLED']) # steep:ignore
# For initial release of Dynamic Instrumentation, activate code tracking
# only if DI is explicitly requested in the environment.
# Code tracking is required for line probes to work; see the comments
# above for the implementation of the method.
#
# If DI is enabled programmatically, the application can (and must,
# for line probes to work) activate tracking in an initializer.
Datadog::DI.activate_tracking
end
=end
Loading

0 comments on commit 23a33f9

Please sign in to comment.