From a3df08341cb757a9bd74c8024307b23a197c8405 Mon Sep 17 00:00:00 2001 From: cjcolvar Date: Mon, 3 Jun 2024 16:33:20 -0400 Subject: [PATCH 1/3] Make media object json response consistent between SpeedyAF proxy and ActiveFedora object --- app/models/media_object.rb | 4 ++-- app/models/mods_behaviors.rb | 9 +++++++-- app/presenters/speedy_af/proxy/media_object.rb | 8 ++++++++ config/initializers/presenter_config.rb | 3 ++- spec/controllers/media_objects_controller_spec.rb | 4 +++- spec/factories/media_objects.rb | 6 ++++++ 6 files changed, 28 insertions(+), 6 deletions(-) diff --git a/app/models/media_object.rb b/app/models/media_object.rb index 668efa0009..8d31885aa8 100644 --- a/app/models/media_object.rb +++ b/app/models/media_object.rb @@ -318,8 +318,8 @@ def to_solr(include_child_fields: false) solr_doc["title_ssort"] = self.title solr_doc["creator_ssort"] = Array(self.creator).join(', ') solr_doc["date_ingested_ssim"] = self.create_date.strftime "%F" if self.create_date.present? - solr_doc['avalon_resource_type_ssim'] = self.avalon_resource_type.map(&:titleize) - solr_doc['identifier_ssim'] = self.identifier.map(&:downcase) + solr_doc['avalon_resource_type_ssim'] = self.avalon_resource_type + solr_doc['identifier_ssim'] = self.identifier solr_doc['note_ssm'] = self.note.collect { |n| n.to_json } solr_doc['other_identifier_ssm'] = self.other_identifier.collect { |oi| oi.to_json } solr_doc['related_item_url_ssm'] = self.related_item_url.collect { |r| r.to_json } diff --git a/app/models/mods_behaviors.rb b/app/models/mods_behaviors.rb index e29467e80e..1996d13de7 100644 --- a/app/models/mods_behaviors.rb +++ b/app/models/mods_behaviors.rb @@ -42,7 +42,7 @@ def to_solr(solr_doc = Hash.new, opts = {}) solr_doc['abstract_ssi'] = self.find_by_terms(:abstract).text solr_doc['publisher_ssim'] = gather_terms(self.find_by_terms(:publisher)) solr_doc['contributor_ssim'] = gather_terms(self.find_by_terms(:contributor)) - solr_doc['subject_ssim'] = gather_terms(self.find_by_terms(:subject)) + solr_doc['subject_ssim'] = gather_terms(self.find_by_terms(:topical_subject)) solr_doc['genre_ssim'] = gather_terms(self.find_by_terms(:genre)) # solr_doc['physical_dtl_sim'] = gather_terms(self.find_by_terms(:format)) # solr_doc['contents_sim'] = gather_terms(self.find_by_terms(:parts_list)) @@ -52,7 +52,7 @@ def to_solr(solr_doc = Hash.new, opts = {}) # solr_doc['collection_sim'] = gather_terms(self.find_by_terms(:archival_collection)) solr_doc['series_ssim'] = gather_terms(self.find_by_terms(:series)) #filter formats based upon whitelist - solr_doc['resource_type_ssim'] = (gather_terms(self.find_by_terms(:resource_type)) & ['moving image', 'sound recording' ]).map(&:titleize) + solr_doc['resource_type_ssim'] = (gather_terms(self.find_by_terms(:resource_type)) & ['moving image', 'sound recording' ]) solr_doc['location_ssim'] = gather_terms(self.find_by_terms(:geographic_subject)) # Blacklight facets - these are the same facet fields used in our Blacklight app @@ -78,6 +78,11 @@ def to_solr(solr_doc = Hash.new, opts = {}) solr_doc['terms_of_use_ssi'] = (self.find_by_terms(:terms_of_use) - self.find_by_terms(:rights_statement)).text solr_doc['rights_statement_ssi'] = self.find_by_terms(:rights_statement).text solr_doc['other_identifier_sim'] = gather_terms(self.find_by_terms(:other_identifier)) + solr_doc['bibliographic_id_ssi'] = self.bibliographic_id.first + solr_doc['bibliographic_id_source_ssi'] = self.bibliographic_id.source.first + solr_doc['uniform_title_ssim'] = gather_terms(self.find_by_terms(:uniform_title)) + solr_doc['statement_of_responsibility_ssi'] = gather_terms(self.find_by_terms(:statement_of_responsibility)) + solr_doc['record_identifier_ssim'] = gather_terms(self.find_by_terms(:record_identifier)) # Extract 4-digit year for creation date facet in Hydra and pub_date facet in Blacklight solr_doc['date_issued_ssi'] = self.find_by_terms(:date_issued).text diff --git a/app/presenters/speedy_af/proxy/media_object.rb b/app/presenters/speedy_af/proxy/media_object.rb index cf84fe2fce..308d1ddd4a 100644 --- a/app/presenters/speedy_af/proxy/media_object.rb +++ b/app/presenters/speedy_af/proxy/media_object.rb @@ -132,6 +132,14 @@ def language attrs[:language_code].present? ? attrs[:language_code].map { |code| { code: code, text: LanguageTerm.find(code).text } } : [] end + def bibliographic_id + if attrs[:bibliographic_id].present? && attrs[:bibliographic_id_source].present? + { id: attrs[:bibliographic_id], source: attrs[:bibliographic_id_source] } + else + nil + end + end + def sections_with_files(tag: '*') sections.select { |master_file| master_file.supplemental_files(tag: tag).present? }.map(&:id) end diff --git a/config/initializers/presenter_config.rb b/config/initializers/presenter_config.rb index b238467210..9aa9e49763 100644 --- a/config/initializers/presenter_config.rb +++ b/config/initializers/presenter_config.rb @@ -60,7 +60,8 @@ uniform_title: [], resource_type: [], record_identifier: [], - series: [] + series: [], + format: [] } include VirtualGroups include MediaObjectIntercom diff --git a/spec/controllers/media_objects_controller_spec.rb b/spec/controllers/media_objects_controller_spec.rb index 7369ccf240..6fb35eb498 100644 --- a/spec/controllers/media_objects_controller_spec.rb +++ b/spec/controllers/media_objects_controller_spec.rb @@ -1181,7 +1181,7 @@ context "with json format" do subject(:json) { JSON.parse(response.body) } let(:administrator) { FactoryBot.create(:administrator) } - let!(:media_object) { FactoryBot.create(:media_object) } + let!(:media_object) { FactoryBot.create(:fully_searchable_media_object) } let!(:master_file) { FactoryBot.create(:master_file, :with_derivative, media_object: media_object) } before do @@ -1190,6 +1190,8 @@ end it "should return json for specific media_object" do + # Run indexing job to ensure object isn't reified in this request + perform_enqueued_jobs(only: MediaObjectIndexingJob) get 'show', params: { id: media_object.id, format:'json' } expect(json['id']).to eq(media_object.id) expect(json['title']).to eq(media_object.title) diff --git a/spec/factories/media_objects.rb b/spec/factories/media_objects.rb index f023a97e04..33dc032b92 100644 --- a/spec/factories/media_objects.rb +++ b/spec/factories/media_objects.rb @@ -50,6 +50,12 @@ terms_of_use { [ 'Terms of Use: Be kind. Rewind.' ] } series { [Faker::Lorem.word] } sections { [] } + statement_of_responsibility { Faker::Lorem.word } + uniform_title { [Faker::Lorem.sentence] } + identifier { [Faker::Alphanumeric.alphanumeric(number: 8, min_alpha: 1, min_numeric: 1).downcase, + Faker::Alphanumeric.alphanumeric(number: 8, min_alpha: 1, min_numeric: 1).upcase, + Faker::Barcode.isbn] } + resource_type { ['moving image'] } # after(:create) do |mo| # mo.update_datastream(:descMetadata, { # note: {note[Faker::Lorem.paragraph], From ac67722a88e3b83986f31ada43fec556f15ca186 Mon Sep 17 00:00:00 2001 From: cjcolvar Date: Wed, 5 Jun 2024 16:10:01 -0400 Subject: [PATCH 2/3] Index other fields missing from MODS --- app/models/concerns/media_object_mods.rb | 2 +- app/models/mods_behaviors.rb | 6 ++++-- spec/controllers/media_objects_controller_spec.rb | 13 ++----------- spec/factories/media_objects.rb | 10 ++++++++-- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/app/models/concerns/media_object_mods.rb b/app/models/concerns/media_object_mods.rb index a1ce122db4..17261d173a 100644 --- a/app/models/concerns/media_object_mods.rb +++ b/app/models/concerns/media_object_mods.rb @@ -90,7 +90,7 @@ def alternative_title=(value) # has_attributes :translated_title, datastream: :descMetadata, at: [:translated_title], multiple: true def translated_title - descMetadata.alternative_title + descMetadata.translated_title end def translated_title=(value) diff --git a/app/models/mods_behaviors.rb b/app/models/mods_behaviors.rb index 1996d13de7..af82ebba13 100644 --- a/app/models/mods_behaviors.rb +++ b/app/models/mods_behaviors.rb @@ -34,7 +34,9 @@ def to_solr(solr_doc = Hash.new, opts = {}) end solr_doc['title_addl_sim'] = gather_terms(addl_titles) solr_doc['heading_sim'] = self.find_by_terms(:main_title).text - + solr_doc['uniform_title_ssim'] = gather_terms(self.find_by_terms(:uniform_title)) + solr_doc['alternative_title_ssim'] = gather_terms(self.find_by_terms(:alternative_title)) + solr_doc['translated_title_ssim'] = gather_terms(self.find_by_terms(:translated_title)) solr_doc['creator_ssim'] = gather_terms(self.find_by_terms(:creator)) # solr_doc['creator_ssi'] = self.find_by_terms(:creator).text @@ -80,13 +82,13 @@ def to_solr(solr_doc = Hash.new, opts = {}) solr_doc['other_identifier_sim'] = gather_terms(self.find_by_terms(:other_identifier)) solr_doc['bibliographic_id_ssi'] = self.bibliographic_id.first solr_doc['bibliographic_id_source_ssi'] = self.bibliographic_id.source.first - solr_doc['uniform_title_ssim'] = gather_terms(self.find_by_terms(:uniform_title)) solr_doc['statement_of_responsibility_ssi'] = gather_terms(self.find_by_terms(:statement_of_responsibility)) solr_doc['record_identifier_ssim'] = gather_terms(self.find_by_terms(:record_identifier)) # Extract 4-digit year for creation date facet in Hydra and pub_date facet in Blacklight solr_doc['date_issued_ssi'] = self.find_by_terms(:date_issued).text solr_doc['date_created_ssi'] = self.find_by_terms(:date_created).text + solr_doc['copyright_date_ssi'] = self.find_by_terms(:copyright_date).text # Put both publication date and creation date into the date facet solr_doc['date_sim'] = gather_years(solr_doc['date_issued_ssi']) solr_doc['date_sim'] += gather_years(solr_doc['date_created_ssi']) if solr_doc['date_created_ssi'].present? diff --git a/spec/controllers/media_objects_controller_spec.rb b/spec/controllers/media_objects_controller_spec.rb index 6fb35eb498..ac5c550473 100644 --- a/spec/controllers/media_objects_controller_spec.rb +++ b/spec/controllers/media_objects_controller_spec.rb @@ -1181,7 +1181,7 @@ context "with json format" do subject(:json) { JSON.parse(response.body) } let(:administrator) { FactoryBot.create(:administrator) } - let!(:media_object) { FactoryBot.create(:fully_searchable_media_object) } + let!(:media_object) { FactoryBot.create(:all_fields_media_object) } let!(:master_file) { FactoryBot.create(:master_file, :with_derivative, media_object: media_object) } before do @@ -1203,17 +1203,8 @@ expect(json['published']).to eq(media_object.published?) expect(json['summary']).to eq(media_object.abstract) - # FIXME: https://github.com/avalonmediasystem/avalon/issues/5834 ingest_api_hash = media_object.to_ingest_api_hash(false) - json['fields'].each do |k,v| - if k == "avalon_resource_type" - expect(v.map(&:downcase)).to eq(ingest_api_hash[:fields][k.to_sym]) - elsif k == "record_identifier" - # no-op since not indexed - else - expect(v).to eq(ingest_api_hash[:fields][k.to_sym]) - end - end + json['fields'].each { |k,v| expect(v).to eq(ingest_api_hash[:fields][k.to_sym]) } # Symbolize keys for master files and derivatives json['files'].each do |mf| diff --git a/spec/factories/media_objects.rb b/spec/factories/media_objects.rb index 33dc032b92..901d722386 100644 --- a/spec/factories/media_objects.rb +++ b/spec/factories/media_objects.rb @@ -50,12 +50,11 @@ terms_of_use { [ 'Terms of Use: Be kind. Rewind.' ] } series { [Faker::Lorem.word] } sections { [] } - statement_of_responsibility { Faker::Lorem.word } - uniform_title { [Faker::Lorem.sentence] } identifier { [Faker::Alphanumeric.alphanumeric(number: 8, min_alpha: 1, min_numeric: 1).downcase, Faker::Alphanumeric.alphanumeric(number: 8, min_alpha: 1, min_numeric: 1).upcase, Faker::Barcode.isbn] } resource_type { ['moving image'] } + statement_of_responsibility { Faker::Lorem.word } # after(:create) do |mo| # mo.update_datastream(:descMetadata, { # note: {note[Faker::Lorem.paragraph], @@ -66,6 +65,13 @@ # }) # mo.save # end + + factory :all_fields_media_object do + uniform_title { [Faker::Lorem.sentence] } + alternative_title { [Faker::Lorem.sentence] } + translated_title { [Faker::Lorem.sentence] } + copyright_date { '2011' } + end end end trait :with_master_file do From a169054052613173749b3569c1c17792333bda04 Mon Sep 17 00:00:00 2001 From: cjcolvar Date: Mon, 10 Jun 2024 14:33:46 -0400 Subject: [PATCH 3/3] Accept known issue of identifiers being lower-cased in JSON responses --- app/models/media_object.rb | 3 ++- spec/controllers/media_objects_controller_spec.rb | 9 ++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/app/models/media_object.rb b/app/models/media_object.rb index 8d31885aa8..84767ea07b 100644 --- a/app/models/media_object.rb +++ b/app/models/media_object.rb @@ -319,7 +319,8 @@ def to_solr(include_child_fields: false) solr_doc["creator_ssort"] = Array(self.creator).join(', ') solr_doc["date_ingested_ssim"] = self.create_date.strftime "%F" if self.create_date.present? solr_doc['avalon_resource_type_ssim'] = self.avalon_resource_type - solr_doc['identifier_ssim'] = self.identifier + # Downcasing identifier allows for case-insensitive searching but has the side effect of causing all identiiers to be lower case in JSON responses + solr_doc['identifier_ssim'] = self.identifier.map(&:downcase) solr_doc['note_ssm'] = self.note.collect { |n| n.to_json } solr_doc['other_identifier_ssm'] = self.other_identifier.collect { |oi| oi.to_json } solr_doc['related_item_url_ssm'] = self.related_item_url.collect { |r| r.to_json } diff --git a/spec/controllers/media_objects_controller_spec.rb b/spec/controllers/media_objects_controller_spec.rb index ac5c550473..d9ecc4bdfa 100644 --- a/spec/controllers/media_objects_controller_spec.rb +++ b/spec/controllers/media_objects_controller_spec.rb @@ -1204,7 +1204,14 @@ expect(json['summary']).to eq(media_object.abstract) ingest_api_hash = media_object.to_ingest_api_hash(false) - json['fields'].each { |k,v| expect(v).to eq(ingest_api_hash[:fields][k.to_sym]) } + json['fields'].each do |k,v| + # Known issue: identifiers are downcased when indexing to allow for case-insensitive searching + if k.to_sym == :identifier + expect(v).to eq(ingest_api_hash[:fields][k.to_sym].map(&:downcase)) + else + expect(v).to eq(ingest_api_hash[:fields][k.to_sym]) + end + end # Symbolize keys for master files and derivatives json['files'].each do |mf|