From 9c7c87050685cee4c30057fcaa938cb9e377bdb1 Mon Sep 17 00:00:00 2001 From: Justin Littman Date: Wed, 15 Jan 2025 12:41:33 -0500 Subject: [PATCH] Adds tracing id to troubleshoot indexing. refs #5231 --- app/jobs/reindex_job.rb | 9 +++-- app/services/indexer.rb | 17 +++++++-- .../indexing/builders/document_builder.rb | 8 ++-- .../indexing/indexers/basic_indexer.rb | 6 ++- bin/rolling_index | 3 +- spec/jobs/reindex_job_spec.rb | 7 ++-- spec/services/indexer_spec.rb | 37 ++++++++++++++++--- .../builders/document_builder_spec.rb | 10 +++-- .../indexing/indexers/basic_indexer_spec.rb | 12 ++++-- 9 files changed, 80 insertions(+), 29 deletions(-) diff --git a/app/jobs/reindex_job.rb b/app/jobs/reindex_job.rb index d02284fa5..6b4edbe98 100644 --- a/app/jobs/reindex_job.rb +++ b/app/jobs/reindex_job.rb @@ -7,12 +7,13 @@ class ReindexJob < ApplicationJob # @param [Hash] model the cocina object attributes (without metadata) # @param [DateTime] created the time the object was created # @param [DateTime] modified the time the object was last modified - def perform(model:, created:, modified:) + # @param [String] trace_id the trace id + def perform(model:, created:, modified:, trace_id:) cocina_object = Cocina::Models.build(model) cocina_object_with_metadata = Cocina::Models.with_metadata(cocina_object, 'void', created:, modified:) - Indexer.reindex(cocina_object: cocina_object_with_metadata) + Indexer.reindex(cocina_object: cocina_object_with_metadata, trace_id: trace_id) rescue CocinaObjectStore::CocinaObjectStoreError => e - Rails.logger.error("Error reindexing #{model[:externalIdentifier]}: #{e.message}") - Honeybadger.notify(e, context: { druid: model[:externalIdentifier] }) + Rails.logger.error("Error reindexing #{model[:externalIdentifier]} - trace_id #{trace_id}: #{e.message}") + Honeybadger.notify(e, context: { druid: model[:externalIdentifier], trace_id: }) end end diff --git a/app/services/indexer.rb b/app/services/indexer.rb index 103e105a1..be7b59500 100644 --- a/app/services/indexer.rb +++ b/app/services/indexer.rb @@ -3,11 +3,12 @@ # Indexes a Cocina object. class Indexer # @param [Cocina::Models::DROWithMetadata|CollectionWithMetadata|AdminPolicyWithMetadata] - def self.reindex(cocina_object:) + def self.reindex(cocina_object:, trace_id: nil) return unless Settings.solr.enabled solr_doc = Indexing::Builders::DocumentBuilder.for( - model: cocina_object + model: cocina_object, + trace_id: trace_id || trace_id_for(druid: cocina_object.externalIdentifier) ).to_solr solr.add(solr_doc) solr.commit @@ -21,11 +22,12 @@ def self.delete(druid:) end # @param [Cocina::Models::DROWithMetadata|CollectionWithMetadata|AdminPolicyWithMetadata] - def self.reindex_later(cocina_object:) + def self.reindex_later(cocina_object:, trace_id: nil) ReindexJob.perform_later( model: cocina_object.to_h, created: cocina_object.created, - modified: cocina_object.modified + modified: cocina_object.modified, + trace_id: trace_id || trace_id_for(druid: cocina_object.externalIdentifier) ) end @@ -33,4 +35,11 @@ def self.solr RSolr.connect(timeout: 120, open_timeout: 120, url: Settings.solr.url) end private_class_method :solr + + def self.trace_id_for(druid:) + source = Kernel.caller_locations(2, 1).first.to_s.delete_prefix("#{Rails.root}/") # rubocop:disable Rails/FilePath + SecureRandom.uuid.tap do |trace_id| + Rails.logger.info("Reindexing #{druid} from #{source} with trace_id=#{trace_id}") + end + end end diff --git a/app/services/indexing/builders/document_builder.rb b/app/services/indexing/builders/document_builder.rb index ff20acca9..5eee73907 100644 --- a/app/services/indexing/builders/document_builder.rb +++ b/app/services/indexing/builders/document_builder.rb @@ -56,8 +56,9 @@ def self.reset_parent_collections @@parent_collections = {} # rubocop:disable Style/ClassVars end - def initialize(model:) + def initialize(model:, trace_id: SecureRandom.uuid) @model = model + @trace_id = trace_id end # @param [Cocina::Models::DROWithMetadata,Cocina::Models::CollectionWithMetadata,Cocina::Model::AdminPolicyWithMetadata] model @@ -65,7 +66,8 @@ def for indexer_for_type(model.type).new(id:, cocina: model, parent_collections:, - administrative_tags:) + administrative_tags:, + trace_id:) rescue StandardError => e Honeybadger.notify('[DATA ERROR] Unexpected indexing exception', tags: 'data_error', @@ -77,7 +79,7 @@ def for private - attr_reader :model, :workflow_client + attr_reader :model, :workflow_client, :trace_id def id model.externalIdentifier diff --git a/app/services/indexing/indexers/basic_indexer.rb b/app/services/indexing/indexers/basic_indexer.rb index 9917b19f0..d01dd92f6 100644 --- a/app/services/indexing/indexers/basic_indexer.rb +++ b/app/services/indexing/indexers/basic_indexer.rb @@ -4,10 +4,11 @@ module Indexing module Indexers # Basic indexing for any object class BasicIndexer - attr_reader :cocina, :workflow_client + attr_reader :cocina, :workflow_client, :trace_id - def initialize(cocina:, **) + def initialize(cocina:, trace_id:, **) @cocina = cocina + @trace_id = trace_id @workflow_client = workflow_client end @@ -15,6 +16,7 @@ def initialize(cocina:, **) def to_solr {}.tap do |solr_doc| solr_doc[:id] = cocina.externalIdentifier + solr_doc['trace_id_ss'] = trace_id solr_doc['current_version_isi'] = cocina.version # Argo Facet field "Version" solr_doc['obj_label_tesim'] = cocina.label diff --git a/bin/rolling_index b/bin/rolling_index index 738c08f67..bbd6a211f 100755 --- a/bin/rolling_index +++ b/bin/rolling_index @@ -43,7 +43,8 @@ Daemons.run_proc( cocina_object = CocinaObjectStore.find(identifier) # This returns a Solr doc hash Indexing::Builders::DocumentBuilder.for( - model: cocina_object + model: cocina_object, + trace_id: Indexer.trace_id_for(druid: identifier) ).to_solr rescue CocinaObjectStore::CocinaObjectNotFoundError Honeybadger.notify('Rolling indexer cannot reindex since not found.', { druid: identifier }) diff --git a/spec/jobs/reindex_job_spec.rb b/spec/jobs/reindex_job_spec.rb index 4b249625b..b57c3a2d3 100644 --- a/spec/jobs/reindex_job_spec.rb +++ b/spec/jobs/reindex_job_spec.rb @@ -4,10 +4,11 @@ RSpec.describe ReindexJob do subject(:perform) do - described_class.perform_now(model: dro.to_h, created: Time.zone.now, modified: Time.zone.now) + described_class.perform_now(model: dro.to_h, created: Time.zone.now, modified: Time.zone.now, trace_id:) end let(:dro) { build(:dro) } + let(:trace_id) { 'abc123' } context 'when no errors' do before do @@ -16,7 +17,7 @@ it 'invokes the Indexer' do perform - expect(Indexer).to have_received(:reindex).with(cocina_object: an_instance_of(Cocina::Models::DROWithMetadata)) + expect(Indexer).to have_received(:reindex).with(cocina_object: an_instance_of(Cocina::Models::DROWithMetadata), trace_id:) end end @@ -28,7 +29,7 @@ it 'Honeybadger alerts' do perform - expect(Indexer).to have_received(:reindex).with(cocina_object: an_instance_of(Cocina::Models::DROWithMetadata)) + expect(Indexer).to have_received(:reindex).with(cocina_object: an_instance_of(Cocina::Models::DROWithMetadata), trace_id:) expect(Honeybadger).to have_received(:notify) end end diff --git a/spec/services/indexer_spec.rb b/spec/services/indexer_spec.rb index 2bf9329c7..6596cac02 100644 --- a/spec/services/indexer_spec.rb +++ b/spec/services/indexer_spec.rb @@ -9,9 +9,12 @@ let(:indexer) { double(to_solr: solr_doc) } # rubocop:disable RSpec/VerifiedDoubles let(:solr_doc) { instance_double(Hash) } let(:solr) { instance_double(RSolr::Client, add: nil, commit: nil, delete_by_id: nil) } + let(:trace_id) { 'abc123' } + let(:generated_trace_id) { 'def456' } before do allow(RSolr).to receive(:connect).and_return(solr) + allow(SecureRandom).to receive(:uuid).and_return(generated_trace_id) end describe '#reindex' do @@ -20,9 +23,10 @@ end it 'reindexes the object' do - described_class.reindex(cocina_object:) + described_class.reindex(cocina_object:, trace_id:) expect(Indexing::Builders::DocumentBuilder).to have_received(:for).with( - model: cocina_object + model: cocina_object, + trace_id: ) expect(solr).to have_received(:add).with(solr_doc) expect(solr).to have_received(:commit) @@ -34,12 +38,22 @@ end it 'does not reindex the object' do - described_class.reindex(cocina_object:) + described_class.reindex(cocina_object:, trace_id:) expect(Indexing::Builders::DocumentBuilder).not_to have_received(:for) expect(solr).not_to have_received(:add) expect(solr).not_to have_received(:commit) end end + + context 'when trace_id is not provided' do + it 'generates a trace_id' do + described_class.reindex(cocina_object:) + expect(Indexing::Builders::DocumentBuilder).to have_received(:for).with( + model: cocina_object, + trace_id: generated_trace_id + ) + end + end end describe '#delete' do @@ -68,12 +82,25 @@ end it 'reindexes the object later' do - described_class.reindex_later(cocina_object:) + described_class.reindex_later(cocina_object:, trace_id:) expect(ReindexJob).to have_received(:perform_later).with( model: cocina_object.to_h, created: cocina_object.created, - modified: cocina_object.modified + modified: cocina_object.modified, + trace_id: ) end + + context 'when trace_id is not provided' do + it 'generates a trace_id' do + described_class.reindex_later(cocina_object:) + expect(ReindexJob).to have_received(:perform_later).with( + model: cocina_object.to_h, + created: cocina_object.created, + modified: cocina_object.modified, + trace_id: generated_trace_id + ) + end + end end end diff --git a/spec/services/indexing/builders/document_builder_spec.rb b/spec/services/indexing/builders/document_builder_spec.rb index f156a3630..ba9c5d927 100644 --- a/spec/services/indexing/builders/document_builder_spec.rb +++ b/spec/services/indexing/builders/document_builder_spec.rb @@ -4,7 +4,7 @@ RSpec.describe Indexing::Builders::DocumentBuilder do subject(:indexer) do - described_class.for(model: cocina_with_metadata) + described_class.for(model: cocina_with_metadata, trace_id:) end let(:cocina_with_metadata) do @@ -12,6 +12,8 @@ modified: DateTime.parse('Thu, 04 Mar 2021 23:05:34 GMT')) end let(:druid) { 'druid:xx999xx9999' } + let(:trace_id) { 'abc123' } + let(:releasable) do instance_double(Indexing::Indexers::ReleasableIndexer, to_solr: { 'released_to_ssim' => %w[searchworks earthworks] }) end @@ -68,7 +70,8 @@ before do allow(CocinaObjectStore).to receive(:find).and_return(related) described_class.for( - model: cocina_with_metadata + model: cocina_with_metadata, + trace_id: ) end @@ -94,7 +97,8 @@ .with(cocina: Cocina::Models::DROWithMetadata, id: String, administrative_tags: [], - parent_collections: []) + parent_collections: [], + trace_id:) expect(Honeybadger).to have_received(:notify).with('Bad association found on druid:xx999xx9999. druid:bc999df2323 could not be found') end end diff --git a/spec/services/indexing/indexers/basic_indexer_spec.rb b/spec/services/indexing/indexers/basic_indexer_spec.rb index 9510e4a8e..46902800b 100644 --- a/spec/services/indexing/indexers/basic_indexer_spec.rb +++ b/spec/services/indexing/indexers/basic_indexer_spec.rb @@ -23,9 +23,10 @@ let(:indexer) do Indexing::Indexers::CompositeIndexer.new( described_class - ).new(id: 'druid:ab123cd4567', cocina:, workflow_client: instance_double(Dor::Workflow::Client)) + ).new(id: 'druid:ab123cd4567', cocina:, workflow_client: instance_double(Dor::Workflow::Client), trace_id:) end let(:doc) { indexer.to_solr } + let(:trace_id) { 'abc123' } context 'with collections' do let(:structural) do @@ -42,7 +43,8 @@ 'is_member_of_collection_ssim' => ['info:fedora/druid:bb777bb7777', 'info:fedora/druid:dd666dd6666'], 'modified_latest_dttsi' => '2021-03-04T23:05:34Z', 'created_at_dttsi' => '2020-01-01T12:00:01Z', - 'id' => 'druid:xx999xx9999' + 'id' => 'druid:xx999xx9999', + 'trace_id_ss' => 'abc123' ) end end @@ -62,7 +64,8 @@ 'has_constituents_ssim' => nil, 'modified_latest_dttsi' => '2021-03-04T23:05:34Z', 'created_at_dttsi' => '2020-01-01T12:00:01Z', - 'id' => 'druid:xx999xx9999' + 'id' => 'druid:xx999xx9999', + 'trace_id_ss' => 'abc123' ) end end @@ -82,7 +85,8 @@ 'is_member_of_collection_ssim' => [], 'modified_latest_dttsi' => '2021-03-04T23:05:34Z', 'created_at_dttsi' => '2020-01-01T12:00:01Z', - 'id' => 'druid:xx999xx9999' + 'id' => 'druid:xx999xx9999', + 'trace_id_ss' => 'abc123' ) end end