From a0826d94f1114f5030ca00da33066864761dce29 Mon Sep 17 00:00:00 2001 From: Mason Ballengee Date: Tue, 17 Oct 2023 10:26:48 -0400 Subject: [PATCH 1/7] Adjust indexing so all MO metadata values get added to proxy --- app/models/iiif_manifest_presenter.rb | 4 +- app/models/media_object.rb | 36 +++++----- app/models/mods_behaviors.rb | 70 ++++++++++--------- .../speedy_af/proxy/media_object.rb | 22 +++++- spec/controllers/catalog_controller_spec.rb | 2 +- spec/models/media_object_spec.rb | 16 ++--- .../speedy_af/proxy/media_object_spec.rb | 36 +++++++++- 7 files changed, 120 insertions(+), 66 deletions(-) diff --git a/app/models/iiif_manifest_presenter.rb b/app/models/iiif_manifest_presenter.rb index ed053a9496..586abdfeb5 100644 --- a/app/models/iiif_manifest_presenter.rb +++ b/app/models/iiif_manifest_presenter.rb @@ -139,10 +139,12 @@ def display_unit(media_object) end def display_language(media_object) + return nil unless media_object.language.present? media_object.language.collect { |l| l[:text] }.uniq end def display_related_item(media_object) + return nil unless media_object.related_item_url.present? media_object.related_item_url.collect { |r| "#{r[:label]}" } end @@ -180,7 +182,7 @@ def iiif_metadata_fields metadata_field('Contributor', media_object.contributor), metadata_field('Publisher', media_object.publisher), metadata_field('Genre', media_object.genre), - metadata_field('Subject', media_object.subject), + metadata_field('Subject', media_object.topical_subject), metadata_field('Time period', media_object.temporal_subject), metadata_field('Location', media_object.geographic_subject), metadata_field('Collection', display_collection(media_object)), diff --git a/app/models/media_object.rb b/app/models/media_object.rb index 5c38be8751..23baebf6f6 100644 --- a/app/models/media_object.rb +++ b/app/models/media_object.rb @@ -234,11 +234,11 @@ def section_physical_descriptions def fill_in_solr_fields_that_need_master_files(solr_doc) solr_doc['section_id_ssim'] = ordered_master_file_ids - solr_doc["other_identifier_sim"] += master_files.collect {|mf| mf.identifier.to_a }.flatten - solr_doc["date_digitized_sim"] = master_files.collect {|mf| mf.date_digitized }.compact.map {|t| Time.parse(t).strftime "%F" } + solr_doc["other_identifier_ssim"] += master_files.collect {|mf| mf.identifier.to_a }.flatten + solr_doc["date_digitized_ssim"] = master_files.collect {|mf| mf.date_digitized }.compact.map {|t| Time.parse(t).strftime "%F" } solr_doc["section_label_tesim"] = section_labels solr_doc['section_physical_description_ssim'] = section_physical_descriptions - solr_doc['all_comments_sim'] = all_comments + solr_doc['all_comments_ssim'] = all_comments end # Enqueue background job to do a full indexing including more costly fields that read from children @@ -257,7 +257,7 @@ def to_solr(include_child_fields: false) solr_doc[Hydra.config.permissions.read.group] += solr_doc['read_access_ip_group_ssim'] solr_doc["title_ssort"] = self.title solr_doc["creator_ssort"] = Array(self.creator).join(', ') - solr_doc["date_ingested_sim"] = self.create_date.strftime "%F" if self.create_date.present? + solr_doc["date_ingested_ssim"] = self.create_date.strftime "%F" if self.create_date.present? solr_doc['avalon_resource_type_ssim'] = self.avalon_resource_type.map(&:titleize) solr_doc['identifier_ssim'] = self.identifier.map(&:downcase) if include_child_fields @@ -265,29 +265,29 @@ def to_solr(include_child_fields: false) elsif id.present? # avoid error in test suite # Fill in other identifier so these values aren't stripped from the solr doc while waiting for the background job mf_docs = ActiveFedora::SolrService.query("isPartOf_ssim:#{id}", rows: 1_000_000) - solr_doc["other_identifier_sim"] += mf_docs.collect { |h| h['identifier_ssim'] }.flatten + solr_doc["other_identifier_ssim"] += mf_docs.collect { |h| h['identifier_ssim'] }.flatten end #Add all searchable fields to the all_text_timv field all_text_values = [] all_text_values << solr_doc["title_tesi"] all_text_values << solr_doc["creator_ssim"] - all_text_values << solr_doc["contributor_sim"] + all_text_values << solr_doc["contributor_ssim"] all_text_values << solr_doc["unit_ssim"] all_text_values << solr_doc["collection_ssim"] - all_text_values << solr_doc["summary_ssi"] - all_text_values << solr_doc["publisher_sim"] - all_text_values << solr_doc["subject_topic_sim"] - all_text_values << solr_doc["subject_geographic_sim"] - all_text_values << solr_doc["subject_temporal_sim"] - all_text_values << solr_doc["genre_sim"] - all_text_values << solr_doc["language_sim"] - all_text_values << solr_doc["physical_description_sim"] + all_text_values << solr_doc["abstract_ssi"] + all_text_values << solr_doc["publisher_ssim"] + all_text_values << solr_doc["topical_subject_ssim"] + all_text_values << solr_doc["geographic_subject_ssim"] + all_text_values << solr_doc["temporal_subject_ssim"] + all_text_values << solr_doc["genre_ssim"] + all_text_values << solr_doc["language_ssim"] + all_text_values << solr_doc["physical_description_ssim"] all_text_values << solr_doc["series_ssim"] - all_text_values << solr_doc["date_sim"] - all_text_values << solr_doc["notes_sim"] - all_text_values << solr_doc["table_of_contents_sim"] - all_text_values << solr_doc["other_identifier_sim"] + all_text_values << solr_doc["date_ssim"] + all_text_values << solr_doc["note_ssim"] + all_text_values << solr_doc["table_of_contents_ssim"] + all_text_values << solr_doc["other_identifier_ssim"] solr_doc["all_text_timv"] = all_text_values.flatten solr_doc.each_pair { |k,v| solr_doc[k] = v.is_a?(Array) ? v.select { |e| e =~ /\S/ } : v } end diff --git a/app/models/mods_behaviors.rb b/app/models/mods_behaviors.rb index 8334dbd8f0..d6faf5630b 100644 --- a/app/models/mods_behaviors.rb +++ b/app/models/mods_behaviors.rb @@ -1,11 +1,11 @@ # Copyright 2011-2023, The Trustees of Indiana University and Northwestern # University. Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. -# +# # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software distributed # under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR # CONDITIONS OF ANY KIND, either express or implied. See the License for the @@ -39,52 +39,54 @@ def to_solr(solr_doc = Hash.new, opts = {}) solr_doc['creator_ssim'] = gather_terms(self.find_by_terms(:creator)) # solr_doc['creator_ssi'] = self.find_by_terms(:creator).text # Individual fields - solr_doc['summary_ssi'] = self.find_by_terms(:abstract).text - solr_doc['publisher_sim'] = gather_terms(self.find_by_terms(:publisher)) - solr_doc['contributor_sim'] = gather_terms(self.find_by_terms(:contributor)) - solr_doc['subject_sim'] = gather_terms(self.find_by_terms(:subject)) - solr_doc['genre_sim'] = gather_terms(self.find_by_terms(:genre)) + solr_doc['abstract_ssi'] = self.find_by_terms(:abstract).text + solr_doc['publisher_ssim'] = gather_terms(self.find_by_terms(:publisher)) + solr_doc['contributor_ssim'] = gather_terms(self.find_by_terms(:contributor)) + solr_doc['subject_ssim'] = gather_terms(self.find_by_terms(:subject)) + solr_doc['genre_ssim'] = gather_terms(self.find_by_terms(:genre)) # solr_doc['physical_dtl_sim'] = gather_terms(self.find_by_terms(:format)) # solr_doc['contents_sim'] = gather_terms(self.find_by_terms(:parts_list)) - solr_doc['notes_sim'] = gather_terms(self.find_by_terms(:note)) - solr_doc['table_of_contents_sim'] = gather_terms(self.find_by_terms(:table_of_contents)) - solr_doc['access_sim'] = gather_terms(self.find_by_terms(:usage)) + solr_doc['note_ssim'] = gather_terms(self.find_by_terms(:note)) + solr_doc['note_type_ssm'] = gather_attribute(self.find_by_terms(:note), 'type') + solr_doc['table_of_contents_ssim'] = gather_terms(self.find_by_terms(:table_of_contents)) + solr_doc['usage_ssim'] = gather_terms(self.find_by_terms(:usage)) # solr_doc['collection_sim'] = gather_terms(self.find_by_terms(:archival_collection)) solr_doc['series_ssim'] = gather_terms(self.find_by_terms(:series)) #filter formats based upon whitelist - solr_doc['format_sim'] = (gather_terms(self.find_by_terms(:resource_type)) & ['moving image', 'sound recording' ]).map(&:titleize) - solr_doc['location_sim'] = gather_terms(self.find_by_terms(:geographic_subject)) + solr_doc['resource_type_ssim'] = (gather_terms(self.find_by_terms(:resource_type)) & ['moving image', 'sound recording' ]).map(&:titleize) + solr_doc['location_ssim'] = gather_terms(self.find_by_terms(:geographic_subject)) # Blacklight facets - these are the same facet fields used in our Blacklight app # for consistency and so they'll show up when we export records from Hydra into BL: - solr_doc['material_sim'] = "Digital" - solr_doc['subject_topic_sim'] = gather_terms(self.find_by_terms(:topical_subject)) - solr_doc['subject_geographic_sim'] = gather_terms(self.find_by_terms(:geographic_subject)) - solr_doc['subject_temporal_sim'] = gather_terms(self.find_by_terms(:temporal_subject)) - solr_doc['subject_occupation_sim'] = gather_terms(self.find_by_terms(:occupation_subject)) - solr_doc['subject_person_sim'] = gather_terms(self.find_by_terms(:person_subject)) - solr_doc['subject_corporate_sim'] = gather_terms(self.find_by_terms(:corporate_subject)) - solr_doc['subject_family_sim'] = gather_terms(self.find_by_terms(:family_subject)) - solr_doc['subject_title_sim'] = gather_terms(self.find_by_terms(:title_subject)) - solr_doc['time_sim'] = gather_terms(self.find_by_terms(:temporal_subject)) + solr_doc['material_ssim'] = "Digital" + solr_doc['topical_subject_ssim'] = gather_terms(self.find_by_terms(:topical_subject)) + solr_doc['geographic_subject_ssim'] = gather_terms(self.find_by_terms(:geographic_subject)) + solr_doc['temporal_subject_ssim'] = gather_terms(self.find_by_terms(:temporal_subject)) + solr_doc['occupation_subject_ssim'] = gather_terms(self.find_by_terms(:occupation_subject)) + solr_doc['person_subject_ssim'] = gather_terms(self.find_by_terms(:person_subject)) + solr_doc['corporate_subject_ssim'] = gather_terms(self.find_by_terms(:corporate_subject)) + solr_doc['family_subject_ssim'] = gather_terms(self.find_by_terms(:family_subject)) + solr_doc['title_subject_ssim'] = gather_terms(self.find_by_terms(:title_subject)) + solr_doc['time_ssim'] = gather_terms(self.find_by_terms(:temporal_subject)) # TODO: map PBcore's three-letter language codes to full language names # Right now, everything's English. - solr_doc['language_sim'] = gather_terms(self.find_by_terms(:language_text)) - solr_doc['language_code_sim'] = gather_terms(self.find_by_terms(:language_code)) - solr_doc['physical_description_sim'] = gather_terms(self.find_by_terms(:physical_description)) - solr_doc['related_item_url_sim'] = gather_terms(self.find_by_terms(:related_item_url)) - solr_doc['related_item_label_sim'] = gather_terms(self.find_by_terms(:related_item_label)) - solr_doc['terms_of_use_si'] = (self.find_by_terms(:terms_of_use) - self.find_by_terms(:rights_statement)).text + solr_doc['language_ssim'] = gather_terms(self.find_by_terms(:language_text)) + solr_doc['language_code_ssim'] = gather_terms(self.find_by_terms(:language_code)) + solr_doc['physical_description_ssim'] = gather_terms(self.find_by_terms(:physical_description)) + solr_doc['related_item_url_ssim'] = gather_terms(self.find_by_terms(:related_item_url)) + solr_doc['related_item_label_ssim'] = gather_terms(self.find_by_terms(:related_item_label)) + solr_doc['terms_of_use_ssi'] = (self.find_by_terms(:terms_of_use) - self.find_by_terms(:rights_statement)).text solr_doc['rights_statement_ssi'] = self.find_by_terms(:rights_statement).text - solr_doc['other_identifier_sim'] = gather_terms(self.find_by_terms(:other_identifier)) + solr_doc['other_identifier_ssim'] = gather_terms(self.find_by_terms(:other_identifier)) + solr_doc['other_identifier_type_ssm'] = gather_attribute(self.find_by_terms(:other_identifier), 'type') # Extract 4-digit year for creation date facet in Hydra and pub_date facet in Blacklight solr_doc['date_ssi'] = self.find_by_terms(:date_issued).text solr_doc['date_created_ssi'] = self.find_by_terms(:date_created).text # Put both publication date and creation date into the date facet - solr_doc['date_sim'] = gather_years(solr_doc['date_ssi']) - solr_doc['date_sim'] += gather_years(solr_doc['date_created_ssi']) if solr_doc['date_created_ssi'].present? + solr_doc['date_ssim'] = gather_years(solr_doc['date_ssi']) + solr_doc['date_ssim'] += gather_years(solr_doc['date_created_ssi']) if solr_doc['date_created_ssi'].present? # For full text, we stuff it into the mods_tesim field which is already configured for Mods doucments solr_doc['mods_tesim'] = self.ng_xml.xpath('//text()').collect { |t| t.text } @@ -185,6 +187,10 @@ def gather_terms(terms) terms.collect { |r| r.text }.compact.uniq end + def gather_attribute(terms, attribute) + terms.collect { |t| t.attribute(attribute).value } + end + def gather_years(date) parsed = Date.edtf(date) return Array.new if parsed.nil? diff --git a/app/presenters/speedy_af/proxy/media_object.rb b/app/presenters/speedy_af/proxy/media_object.rb index 6d6bada58d..7dbad250d4 100644 --- a/app/presenters/speedy_af/proxy/media_object.rb +++ b/app/presenters/speedy_af/proxy/media_object.rb @@ -1,11 +1,11 @@ # Copyright 2011-2023, The Trustees of Indiana University and Northwestern # University. Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. -# +# # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software distributed # under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR # CONDITIONS OF ANY KIND, either express or implied. See the License for the @@ -140,6 +140,22 @@ def governing_policies @governing_policies ||= Array(attrs[:isGovernedBy]).collect { |id| SpeedyAF::Base.find(id) } end + def language + attrs[:language_code].map { |code| { code: code, text: LanguageTerm.find(code).text } } if attrs[:language_code].present? + end + + def note + attrs[:note].map.with_index { |n, i| { note: n, type: attrs[:note_type][i] } } if attrs[:note].present? + end + + def other_identifier + attrs[:other_identifier].map.with_index { |oi, i| { id: oi, source: attrs[:other_identifier_type][i] } } if attrs[:other_identifier].present? + end + + def related_item_url + attrs[:related_item_url].map.with_index { |url, i| { url: url, label: attrs[:related_item_label][i] } } if attrs[:related_item_url].present? + end + protected # Overrides from SpeedyAF::Base diff --git a/spec/controllers/catalog_controller_spec.rb b/spec/controllers/catalog_controller_spec.rb index 0f735b9c34..03ef36a728 100644 --- a/spec/controllers/catalog_controller_spec.rb +++ b/spec/controllers/catalog_controller_spec.rb @@ -172,7 +172,7 @@ describe "search fields" do let(:media_object) { FactoryBot.create(:fully_searchable_media_object) } - ["title_tesi", "creator_ssim", "contributor_sim", "unit_ssim", "collection_ssim", "summary_ssi", "publisher_sim", "subject_topic_sim", "subject_geographic_sim", "subject_temporal_sim", "genre_sim", "physical_description_sim", "language_sim", "date_sim", "notes_sim", "table_of_contents_sim", "other_identifier_sim", "series_ssim" ].each do |field| + ["title_tesi", "creator_ssim", "contributor_ssim", "unit_ssim", "collection_ssim", "abstract_ssi", "publisher_ssim", "topical_subject_ssim", "geographic_subject_ssim", "temporal_subject_ssim", "genre_ssim", "physical_description_ssim", "language_ssim", "date_ssim", "note_ssim", "table_of_contents_ssim", "other_identifier_ssim", "series_ssim" ].each do |field| it "should find results based upon #{field}" do query = Array(media_object.to_solr[field]).first #split on ' ' and only search on the first word of a multiword field value diff --git a/spec/models/media_object_spec.rb b/spec/models/media_object_spec.rb index 3bb0df419e..46d56155b5 100644 --- a/spec/models/media_object_spec.rb +++ b/spec/models/media_object_spec.rb @@ -593,20 +593,20 @@ end it 'should not index any unknown resource types' do media_object.resource_type = 'notated music' - expect(media_object.to_solr['format_sim']).not_to include 'Notated Music' + expect(media_object.to_solr['resource_type_ssim']).not_to include 'Notated Music' end it 'should index separate identifiers as separate values' do media_object.descMetadata.add_other_identifier('12345678','lccn') media_object.descMetadata.add_other_identifier('8675309 testing','local') solr_doc = media_object.to_solr - expect(solr_doc['other_identifier_sim']).to include('12345678','8675309 testing') - expect(solr_doc['other_identifier_sim']).not_to include('123456788675309 testing') + expect(solr_doc['other_identifier_ssim']).to include('12345678','8675309 testing') + expect(solr_doc['other_identifier_ssim']).not_to include('123456788675309 testing') end it 'should index identifier for master files' do master_file = FactoryBot.create(:master_file, identifier: ['TestOtherID'], media_object: media_object) media_object.reload solr_doc = media_object.to_solr(include_child_fields: true) - expect(solr_doc['other_identifier_sim']).to include('TestOtherID') + expect(solr_doc['other_identifier_ssim']).to include('TestOtherID') end it 'should index labels for master files' do FactoryBot.create(:master_file, :with_structure, media_object: media_object, title: 'Test Label') @@ -621,9 +621,9 @@ media_object.save! media_object.reload solr_doc = media_object.to_solr(include_child_fields: true) - expect(solr_doc['all_comments_sim']).to include('MO Comment') - expect(solr_doc['all_comments_sim']).to include('[Test Label] MF Comment 1') - expect(solr_doc['all_comments_sim']).to include('[Test Label] MF Comment 2') + expect(solr_doc['all_comments_ssim']).to include('MO Comment') + expect(solr_doc['all_comments_ssim']).to include('[Test Label] MF Comment 1') + expect(solr_doc['all_comments_ssim']).to include('[Test Label] MF Comment 2') end it 'includes virtual group leases in external group facet' do media_object.governing_policies += [FactoryBot.create(:lease, inherited_read_groups: ['TestGroup'])] @@ -932,7 +932,7 @@ it 'is indexed' do media_object.terms_of_use = terms_of_use_value - expect(media_object.to_solr["terms_of_use_si"]).to eq terms_of_use_value + expect(media_object.to_solr["terms_of_use_ssi"]).to eq terms_of_use_value end it 'roundtrips' do diff --git a/spec/presenters/speedy_af/proxy/media_object_spec.rb b/spec/presenters/speedy_af/proxy/media_object_spec.rb index e20bbdd944..b974a5f8b1 100644 --- a/spec/presenters/speedy_af/proxy/media_object_spec.rb +++ b/spec/presenters/speedy_af/proxy/media_object_spec.rb @@ -1,11 +1,11 @@ # Copyright 2011-2023, The Trustees of Indiana University and Northwestern # University. Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. -# +# # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software distributed # under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR # CONDITIONS OF ANY KIND, either express or implied. See the License for the @@ -37,4 +37,34 @@ end end end + + context "fully searchable media object proxy" do + let(:media_object) { FactoryBot.create(:fully_searchable_media_object) } + it "should include all metadata fields" do + expect(presenter.title).to eq media_object.title + expect(presenter.date_created).to eq media_object.date_created + expect(presenter.date_issued).to eq media_object.date_issued + expect(presenter.copyright_date).to eq media_object.copyright_date + expect(presenter.creator).to eq media_object.creator + expect(presenter.abstract).to eq media_object.abstract + expect(presenter.contributor).to eq media_object.contributor + expect(presenter.publisher).to eq media_object.publisher + expect(presenter.genre).to eq media_object.genre + expect(presenter.topical_subject).to eq media_object.topical_subject + expect(presenter.temporal_subject).to eq media_object.temporal_subject + expect(presenter.geographic_subject).to eq media_object.geographic_subject + expect(presenter.collection.id).to eq media_object.collection.id + expect(presenter.collection.unit).to eq media_object.collection.unit + expect(presenter.language).to eq media_object.language + expect(presenter.rights_statement).to eq media_object.rights_statement + expect(presenter.terms_of_use).to eq media_object.terms_of_use + expect(presenter.physical_description).to eq media_object.physical_description + expect(presenter.related_item_url).to eq media_object.related_item_url + expect(presenter.table_of_contents).to eq media_object.table_of_contents + expect(presenter.note).to eq media_object.note + expect(presenter.other_identifier).to eq media_object.other_identifier + expect(presenter.comment).to eq media_object.comment.to_a + expect(presenter.visibility).to eq media_object.visibility + end + end end From b2f395af42551c2bc1f95b59509aa909cfb9152d Mon Sep 17 00:00:00 2001 From: Mason Ballengee Date: Tue, 17 Oct 2023 14:05:23 -0400 Subject: [PATCH 2/7] Add defaults to the proxy for new solr fields --- config/initializers/presenter_config.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/config/initializers/presenter_config.rb b/config/initializers/presenter_config.rb index d319e16be6..4200e4f3a6 100644 --- a/config/initializers/presenter_config.rb +++ b/config/initializers/presenter_config.rb @@ -43,7 +43,9 @@ physical_description: [], related_item_url: [], note: [], + note_type: [], other_identifier: [], + other_identifier_type: [], rights_statement: nil, table_of_contents: [], bibliographic_id: nil, From 08fc7ae10c393c6ac109d08e5b835d0efadfa0e2 Mon Sep 17 00:00:00 2001 From: Mason Ballengee Date: Tue, 17 Oct 2023 14:22:41 -0400 Subject: [PATCH 3/7] Do not include sourceless other_identifiers in proxy The solr_doc includes the master file ids in the other identifier field. These ids are not included in the accessor on the media object. These ids also are of a different format than the media object's normal other_identifiers. We need to exclude the master file ids from the accessor on the proxy because of those factors. --- app/presenters/speedy_af/proxy/media_object.rb | 2 +- spec/presenters/speedy_af/proxy/media_object_spec.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/presenters/speedy_af/proxy/media_object.rb b/app/presenters/speedy_af/proxy/media_object.rb index 7dbad250d4..6bb0625d01 100644 --- a/app/presenters/speedy_af/proxy/media_object.rb +++ b/app/presenters/speedy_af/proxy/media_object.rb @@ -149,7 +149,7 @@ def note end def other_identifier - attrs[:other_identifier].map.with_index { |oi, i| { id: oi, source: attrs[:other_identifier_type][i] } } if attrs[:other_identifier].present? + attrs[:other_identifier].map.with_index { |oi, i| { id: oi, source: attrs[:other_identifier_type][i] } if attrs[:other_identifier_type][i].present? }.compact if attrs[:other_identifier].present? end def related_item_url diff --git a/spec/presenters/speedy_af/proxy/media_object_spec.rb b/spec/presenters/speedy_af/proxy/media_object_spec.rb index b974a5f8b1..625d362feb 100644 --- a/spec/presenters/speedy_af/proxy/media_object_spec.rb +++ b/spec/presenters/speedy_af/proxy/media_object_spec.rb @@ -39,7 +39,7 @@ end context "fully searchable media object proxy" do - let(:media_object) { FactoryBot.create(:fully_searchable_media_object) } + let(:media_object) { FactoryBot.create(:fully_searchable_media_object, :with_master_file) } it "should include all metadata fields" do expect(presenter.title).to eq media_object.title expect(presenter.date_created).to eq media_object.date_created From 860245fb6e25ffd6e36df6c1a7315301f69cc6a9 Mon Sep 17 00:00:00 2001 From: Mason Ballengee Date: Tue, 17 Oct 2023 15:24:06 -0400 Subject: [PATCH 4/7] Return [] instead of nil for proxy methods --- app/presenters/speedy_af/proxy/media_object.rb | 8 ++++---- config/initializers/presenter_config.rb | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/app/presenters/speedy_af/proxy/media_object.rb b/app/presenters/speedy_af/proxy/media_object.rb index 6bb0625d01..8314add520 100644 --- a/app/presenters/speedy_af/proxy/media_object.rb +++ b/app/presenters/speedy_af/proxy/media_object.rb @@ -141,19 +141,19 @@ def governing_policies end def language - attrs[:language_code].map { |code| { code: code, text: LanguageTerm.find(code).text } } if attrs[:language_code].present? + attrs[:language_code].present? ? attrs[:language_code].map { |code| { code: code, text: LanguageTerm.find(code).text } } : [] end def note - attrs[:note].map.with_index { |n, i| { note: n, type: attrs[:note_type][i] } } if attrs[:note].present? + attrs[:note].present? ? attrs[:note].map.with_index { |n, i| { note: n, type: attrs[:note_type][i] } } : [] end def other_identifier - attrs[:other_identifier].map.with_index { |oi, i| { id: oi, source: attrs[:other_identifier_type][i] } if attrs[:other_identifier_type][i].present? }.compact if attrs[:other_identifier].present? + attrs[:other_identifier].present? ? attrs[:other_identifier].map.with_index { |oi, i| { id: oi, source: attrs[:other_identifier_type][i] } if attrs[:other_identifier_type][i].present? }.compact : [] end def related_item_url - attrs[:related_item_url].map.with_index { |url, i| { url: url, label: attrs[:related_item_label][i] } } if attrs[:related_item_url].present? + attrs[:related_item_url].present? ? attrs[:related_item_url].map.with_index { |url, i| { url: url, label: attrs[:related_item_label][i] } } : [] end protected diff --git a/config/initializers/presenter_config.rb b/config/initializers/presenter_config.rb index 4200e4f3a6..b849740229 100644 --- a/config/initializers/presenter_config.rb +++ b/config/initializers/presenter_config.rb @@ -39,6 +39,7 @@ topical_subject: [], geographic_subject: [], language: [], + language_code: [], terms_of_use: nil, physical_description: [], related_item_url: [], From 4bbb7092137fcacd2db26d61925e53dedb18f816 Mon Sep 17 00:00:00 2001 From: Mason Ballengee Date: Mon, 6 Nov 2023 09:52:51 -0500 Subject: [PATCH 5/7] Rework handling of hash fields and fix catalog --- app/controllers/catalog_controller.rb | 32 +++--- app/models/media_object.rb | 11 +- app/models/mods_behaviors.rb | 10 +- .../speedy_af/proxy/media_object.rb | 17 +-- config/initializers/presenter_config.rb | 2 - spec/controllers/catalog_controller_spec.rb | 2 +- spec/features/capybara_catalog_spec.rb | 101 ++++++++++++++++++ spec/models/media_object_spec.rb | 6 +- 8 files changed, 137 insertions(+), 44 deletions(-) create mode 100644 spec/features/capybara_catalog_spec.rb diff --git a/app/controllers/catalog_controller.rb b/app/controllers/catalog_controller.rb index 1842aaa9d1..e33d2707e9 100644 --- a/app/controllers/catalog_controller.rb +++ b/app/controllers/catalog_controller.rb @@ -82,12 +82,12 @@ class CatalogController < ApplicationController # facet bar config.add_facet_field 'avalon_resource_type_ssim', label: 'Format', limit: 5, collapse: false config.add_facet_field 'creator_ssim', label: 'Main contributor', limit: 5 - config.add_facet_field 'date_sim', label: 'Date', limit: 5 - config.add_facet_field 'genre_sim', label: 'Genres', limit: 5 + config.add_facet_field 'date_ssim', label: 'Date', limit: 5 + config.add_facet_field 'genre_ssim', label: 'Genres', limit: 5 config.add_facet_field 'series_ssim', label: 'Series', limit: 5 config.add_facet_field 'collection_ssim', label: 'Collection', limit: 5 config.add_facet_field 'unit_ssim', label: 'Unit', limit: 5 - config.add_facet_field 'language_sim', label: 'Language', limit: 5 + config.add_facet_field 'language_ssim', label: 'Language', limit: 5 # Hide these facets if not a Collection Manager config.add_facet_field 'workflow_published_sim', label: 'Published', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow" config.add_facet_field 'avalon_uploader_ssi', label: 'Created by', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow" @@ -97,8 +97,8 @@ class CatalogController < ApplicationController private: { label: "Private", fq: "has_model_ssim:MediaObject AND NOT read_access_group_ssim:#{Hydra::AccessControls::AccessRight::PERMISSION_TEXT_VALUE_PUBLIC} AND NOT read_access_group_ssim:#{Hydra::AccessControls::AccessRight::PERMISSION_TEXT_VALUE_AUTHENTICATED}" } } config.add_facet_field 'read_access_virtual_group_ssim', label: 'External Group', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow", helper_method: :vgroup_display - config.add_facet_field 'date_digitized_sim', label: 'Date Digitized', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow"#, partial: 'blacklight/hierarchy/facet_hierarchy' - config.add_facet_field 'date_ingested_sim', label: 'Date Ingested', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow" + config.add_facet_field 'date_digitized_ssim', label: 'Date Digitized', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow"#, partial: 'blacklight/hierarchy/facet_hierarchy' + config.add_facet_field 'date_ingested_ssim', label: 'Date Ingested', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow" # Have BL send all facet field names to Solr, which has been the default # previously. Simply remove these lines if you'd rather use Solr request @@ -113,25 +113,25 @@ class CatalogController < ApplicationController config.add_index_field 'title_tesi', label: 'Title', if: Proc.new {|context, _field_config, _document| context.request.format == :json } config.add_index_field 'date_ssi', label: 'Date', helper_method: :combined_display_date config.add_index_field 'creator_ssim', label: 'Main contributors', helper_method: :contributor_index_display - config.add_index_field 'summary_ssi', label: 'Summary', helper_method: :description_index_display + config.add_index_field 'abstract_ssi', label: 'Summary', helper_method: :description_index_display config.add_index_field 'duration_ssi', label: 'Duration', if: Proc.new {|context, _field_config, _document| context.request.format == :json } config.add_index_field 'section_id_ssim', label: 'Sections', if: Proc.new {|context, _field_config, _document| context.request.format == :json }, helper_method: :section_id_json_index_display # solr fields to be displayed in the show (single result) view # The ordering of the field names is the order of the display config.add_show_field 'title_tesi', label: 'Title' - config.add_show_field 'format_sim', label: 'Format' - config.add_show_field 'creator_sim', label: 'Creator' - config.add_show_field 'language_sim', label: 'Language' + config.add_show_field 'format_ssim', label: 'Format' + config.add_show_field 'creator_ssim', label: 'Creator' + config.add_show_field 'language_ssim', label: 'Language' config.add_show_field 'date_ssi', label: 'Date' - config.add_show_field 'abstract_sim', label: 'Abstract' - config.add_show_field 'location_sim', label: 'Locations' + config.add_show_field 'abstract_ssim', label: 'Abstract' + config.add_show_field 'location_ssim', label: 'Locations' config.add_show_field 'time_period_sim', label: 'Time periods' - config.add_show_field 'contributor_sim', label: 'Contributors' - config.add_show_field 'publisher_sim', label: 'Publisher' - config.add_show_field 'genre_sim', label: 'Genre' - config.add_show_field 'publication_location_sim', label: 'Place of publication' - config.add_show_field 'terms_sim', label: 'Terms' + config.add_show_field 'contributor_ssim', label: 'Contributors' + config.add_show_field 'publisher_ssim', label: 'Publisher' + config.add_show_field 'genre_ssim', label: 'Genre' + config.add_show_field 'publication_location_ssim', label: 'Place of publication' + config.add_show_field 'terms_ssim', label: 'Terms' # "fielded" search configuration. Used by pulldown among other places. # For supported keys in hash, see rdoc for Blacklight::SearchFields diff --git a/app/models/media_object.rb b/app/models/media_object.rb index 23baebf6f6..94887b3462 100644 --- a/app/models/media_object.rb +++ b/app/models/media_object.rb @@ -234,7 +234,7 @@ def section_physical_descriptions def fill_in_solr_fields_that_need_master_files(solr_doc) solr_doc['section_id_ssim'] = ordered_master_file_ids - solr_doc["other_identifier_ssim"] += master_files.collect {|mf| mf.identifier.to_a }.flatten + solr_doc["other_identifier_sim"] += master_files.collect {|mf| mf.identifier.to_a }.flatten solr_doc["date_digitized_ssim"] = master_files.collect {|mf| mf.date_digitized }.compact.map {|t| Time.parse(t).strftime "%F" } solr_doc["section_label_tesim"] = section_labels solr_doc['section_physical_description_ssim'] = section_physical_descriptions @@ -260,12 +260,15 @@ def to_solr(include_child_fields: false) solr_doc["date_ingested_ssim"] = self.create_date.strftime "%F" if self.create_date.present? solr_doc['avalon_resource_type_ssim'] = self.avalon_resource_type.map(&:titleize) solr_doc['identifier_ssim'] = self.identifier.map(&:downcase) + solr_doc['note_ssm'] = self.note.collect { |n| n.to_json } + solr_doc['other_identifier_ssm'] = self.other_identifier.collect { |oi| oi.to_json } + solr_doc['related_item_url_ssm'] = self.related_item_url.collect { |r| r.to_json } if include_child_fields fill_in_solr_fields_that_need_master_files(solr_doc) elsif id.present? # avoid error in test suite # Fill in other identifier so these values aren't stripped from the solr doc while waiting for the background job mf_docs = ActiveFedora::SolrService.query("isPartOf_ssim:#{id}", rows: 1_000_000) - solr_doc["other_identifier_ssim"] += mf_docs.collect { |h| h['identifier_ssim'] }.flatten + solr_doc["other_identifier_sim"] += mf_docs.collect { |h| h['identifier_ssim'] }.flatten end #Add all searchable fields to the all_text_timv field @@ -285,9 +288,9 @@ def to_solr(include_child_fields: false) all_text_values << solr_doc["physical_description_ssim"] all_text_values << solr_doc["series_ssim"] all_text_values << solr_doc["date_ssim"] - all_text_values << solr_doc["note_ssim"] + all_text_values << solr_doc["note_ssm"] all_text_values << solr_doc["table_of_contents_ssim"] - all_text_values << solr_doc["other_identifier_ssim"] + all_text_values << solr_doc["other_identifier_ssm"] solr_doc["all_text_timv"] = all_text_values.flatten solr_doc.each_pair { |k,v| solr_doc[k] = v.is_a?(Array) ? v.select { |e| e =~ /\S/ } : v } end diff --git a/app/models/mods_behaviors.rb b/app/models/mods_behaviors.rb index d6faf5630b..dae72f040c 100644 --- a/app/models/mods_behaviors.rb +++ b/app/models/mods_behaviors.rb @@ -46,8 +46,7 @@ def to_solr(solr_doc = Hash.new, opts = {}) solr_doc['genre_ssim'] = gather_terms(self.find_by_terms(:genre)) # solr_doc['physical_dtl_sim'] = gather_terms(self.find_by_terms(:format)) # solr_doc['contents_sim'] = gather_terms(self.find_by_terms(:parts_list)) - solr_doc['note_ssim'] = gather_terms(self.find_by_terms(:note)) - solr_doc['note_type_ssm'] = gather_attribute(self.find_by_terms(:note), 'type') + solr_doc['notes_sim'] = gather_terms(self.find_by_terms(:note)) solr_doc['table_of_contents_ssim'] = gather_terms(self.find_by_terms(:table_of_contents)) solr_doc['usage_ssim'] = gather_terms(self.find_by_terms(:usage)) # solr_doc['collection_sim'] = gather_terms(self.find_by_terms(:archival_collection)) @@ -74,12 +73,11 @@ def to_solr(solr_doc = Hash.new, opts = {}) solr_doc['language_ssim'] = gather_terms(self.find_by_terms(:language_text)) solr_doc['language_code_ssim'] = gather_terms(self.find_by_terms(:language_code)) solr_doc['physical_description_ssim'] = gather_terms(self.find_by_terms(:physical_description)) - solr_doc['related_item_url_ssim'] = gather_terms(self.find_by_terms(:related_item_url)) - solr_doc['related_item_label_ssim'] = gather_terms(self.find_by_terms(:related_item_label)) + solr_doc['related_item_url_sim'] = gather_terms(self.find_by_terms(:related_item_url)) + solr_doc['related_item_label_sim'] = gather_terms(self.find_by_terms(:related_item_label)) solr_doc['terms_of_use_ssi'] = (self.find_by_terms(:terms_of_use) - self.find_by_terms(:rights_statement)).text solr_doc['rights_statement_ssi'] = self.find_by_terms(:rights_statement).text - solr_doc['other_identifier_ssim'] = gather_terms(self.find_by_terms(:other_identifier)) - solr_doc['other_identifier_type_ssm'] = gather_attribute(self.find_by_terms(:other_identifier), 'type') + solr_doc['other_identifier_sim'] = gather_terms(self.find_by_terms(:other_identifier)) # Extract 4-digit year for creation date facet in Hydra and pub_date facet in Blacklight solr_doc['date_ssi'] = self.find_by_terms(:date_issued).text diff --git a/app/presenters/speedy_af/proxy/media_object.rb b/app/presenters/speedy_af/proxy/media_object.rb index 8314add520..22a886405a 100644 --- a/app/presenters/speedy_af/proxy/media_object.rb +++ b/app/presenters/speedy_af/proxy/media_object.rb @@ -14,6 +14,7 @@ class SpeedyAF::Proxy::MediaObject < SpeedyAF::Base SINGULAR_FIELDS = [:title, :statement_of_responsibility, :date_created, :date_issued, :copyright_date, :abstract, :terms_of_use, :rights_statement] + HASH_FIELDS = [:note, :other_identifier, :related_item_url] # Override to handle section_id specially def initialize(solr_document, instance_defaults = {}) @@ -37,6 +38,10 @@ def initialize(solr_document, instance_defaults = {}) SINGULAR_FIELDS.each do |field_name| @attrs[field_name] = Array(@attrs[field_name]).first end + + HASH_FIELDS.each do |field_name| + @attrs[field_name].collect! { |hf| JSON.parse(hf, :symbolize_names => true) } + end # Convert empty strings to nil @attrs.transform_values! { |value| value == "" ? nil : value } end @@ -144,18 +149,6 @@ def language attrs[:language_code].present? ? attrs[:language_code].map { |code| { code: code, text: LanguageTerm.find(code).text } } : [] end - def note - attrs[:note].present? ? attrs[:note].map.with_index { |n, i| { note: n, type: attrs[:note_type][i] } } : [] - end - - def other_identifier - attrs[:other_identifier].present? ? attrs[:other_identifier].map.with_index { |oi, i| { id: oi, source: attrs[:other_identifier_type][i] } if attrs[:other_identifier_type][i].present? }.compact : [] - end - - def related_item_url - attrs[:related_item_url].present? ? attrs[:related_item_url].map.with_index { |url, i| { url: url, label: attrs[:related_item_label][i] } } : [] - end - protected # Overrides from SpeedyAF::Base diff --git a/config/initializers/presenter_config.rb b/config/initializers/presenter_config.rb index b849740229..698d78dc40 100644 --- a/config/initializers/presenter_config.rb +++ b/config/initializers/presenter_config.rb @@ -44,9 +44,7 @@ physical_description: [], related_item_url: [], note: [], - note_type: [], other_identifier: [], - other_identifier_type: [], rights_statement: nil, table_of_contents: [], bibliographic_id: nil, diff --git a/spec/controllers/catalog_controller_spec.rb b/spec/controllers/catalog_controller_spec.rb index 03ef36a728..4ea262f4d2 100644 --- a/spec/controllers/catalog_controller_spec.rb +++ b/spec/controllers/catalog_controller_spec.rb @@ -172,7 +172,7 @@ describe "search fields" do let(:media_object) { FactoryBot.create(:fully_searchable_media_object) } - ["title_tesi", "creator_ssim", "contributor_ssim", "unit_ssim", "collection_ssim", "abstract_ssi", "publisher_ssim", "topical_subject_ssim", "geographic_subject_ssim", "temporal_subject_ssim", "genre_ssim", "physical_description_ssim", "language_ssim", "date_ssim", "note_ssim", "table_of_contents_ssim", "other_identifier_ssim", "series_ssim" ].each do |field| + ["title_tesi", "creator_ssim", "contributor_ssim", "unit_ssim", "collection_ssim", "abstract_ssi", "publisher_ssim", "topical_subject_ssim", "geographic_subject_ssim", "temporal_subject_ssim", "genre_ssim", "physical_description_ssim", "language_ssim", "date_ssim", "notes_sim", "table_of_contents_ssim", "other_identifier_sim", "series_ssim" ].each do |field| it "should find results based upon #{field}" do query = Array(media_object.to_solr[field]).first #split on ' ' and only search on the first word of a multiword field value diff --git a/spec/features/capybara_catalog_spec.rb b/spec/features/capybara_catalog_spec.rb new file mode 100644 index 0000000000..6a99a7ae67 --- /dev/null +++ b/spec/features/capybara_catalog_spec.rb @@ -0,0 +1,101 @@ +# Copyright 2011-2023, The Trustees of Indiana University and Northwestern +# University. Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# --- END LICENSE_HEADER BLOCK --- + +require 'rails_helper' + +describe 'item catalog' do + after { Warden.test_reset! } + + context 'unauthenticated user' do + let!(:media_object) { FactoryBot.create(:fully_searchable_media_object, :with_master_file, avalon_uploader: 'admin@example.edu') } + before :each do + media_object.read_groups += ['ldap_group'] + media_object.save! + end + + it 'verifies presence of all facet fields' do + visit '/catalog' + ['avalon_resource_type', 'creator', 'date', 'genre', 'series', 'collection', 'unit', 'language'].each do |field| + expect(page).to have_selector(:id, "facet-#{field}_ssim-header") + end + ['workflow_published_sim', 'avalon_uploader_ssi', 'read_access_group_ssim', 'read_access_virtual_group_ssim', 'date_digitized_ssim', 'date_ingested_ssim'].each do |field| + expect(page).to_not have_selector(:id, "facet-#{field}-header") + end + end + end + + context 'user who is not a collection manager' do + let!(:media_object) { FactoryBot.create(:fully_searchable_media_object, :with_master_file, avalon_uploader: 'admin@example.edu') } + + before :each do + media_object.read_groups += ['ldap_group'] + media_object.save! + @user = FactoryBot.create(:user) + login_as @user, scope: :user + end + + it 'verifies presence of all facet fields' do + visit '/catalog' + ['avalon_resource_type', 'creator', 'date', 'genre', 'series', 'collection', 'unit', 'language'].each do |field| + expect(page).to have_selector(:id, "facet-#{field}_ssim-header") + end + ['workflow_published_sim', 'avalon_uploader_ssi', 'read_access_group_ssim', 'read_access_virtual_group_ssim', 'date_digitized_ssim', 'date_ingested_ssim'].each do |field| + expect(page).to_not have_selector(:id, "facet-#{field}-header") + end + end + end + + context 'user with collection manager permissions' do + let(:manager) { FactoryBot.create(:manager) } + let!(:collection) { FactoryBot.create(:collection, managers: [manager.user_key]) } + let!(:media_object) { FactoryBot.create(:fully_searchable_media_object, :with_master_file, avalon_uploader: 'admin@example.edu', collection: collection) } + + before :each do + media_object.read_groups += ['ldap_group'] + media_object.save! + login_as manager, scope: :user + end + + it 'verifies presence of all facet fields' do + visit '/catalog' + ['avalon_resource_type', 'creator', 'date', 'genre', 'series', 'collection', 'unit', 'language'].each do |field| + expect(page).to have_selector(:id, "facet-#{field}_ssim-header") + end + ['workflow_published_sim', 'avalon_uploader_ssi', 'read_access_group_ssim', 'read_access_virtual_group_ssim', 'date_ingested_ssim'].each do |field| + expect(page).to have_selector(:id, "facet-#{field}-header") + end + end + end + + context 'admin user' do + let!(:media_object) { FactoryBot.create(:fully_searchable_media_object, :with_master_file, avalon_uploader: 'admin@example.edu') } + + before :each do + media_object.read_groups += ['ldap_group'] + media_object.save! + @user = FactoryBot.create(:administrator) + login_as @user, scope: :user + end + + it 'verifies presence of all facet fields' do + visit '/catalog' + ['avalon_resource_type', 'creator', 'date', 'genre', 'series', 'collection', 'unit', 'language'].each do |field| + expect(page).to have_selector(:id, "facet-#{field}_ssim-header") + end + ['workflow_published_sim', 'avalon_uploader_ssi', 'read_access_group_ssim', 'read_access_virtual_group_ssim', 'date_ingested_ssim'].each do |field| + expect(page).to have_selector(:id, "facet-#{field}-header") + end + end + end +end diff --git a/spec/models/media_object_spec.rb b/spec/models/media_object_spec.rb index 46d56155b5..c81022917a 100644 --- a/spec/models/media_object_spec.rb +++ b/spec/models/media_object_spec.rb @@ -599,14 +599,14 @@ media_object.descMetadata.add_other_identifier('12345678','lccn') media_object.descMetadata.add_other_identifier('8675309 testing','local') solr_doc = media_object.to_solr - expect(solr_doc['other_identifier_ssim']).to include('12345678','8675309 testing') - expect(solr_doc['other_identifier_ssim']).not_to include('123456788675309 testing') + expect(solr_doc['other_identifier_sim']).to include('12345678','8675309 testing') + expect(solr_doc['other_identifier_sim']).not_to include('123456788675309 testing') end it 'should index identifier for master files' do master_file = FactoryBot.create(:master_file, identifier: ['TestOtherID'], media_object: media_object) media_object.reload solr_doc = media_object.to_solr(include_child_fields: true) - expect(solr_doc['other_identifier_ssim']).to include('TestOtherID') + expect(solr_doc['other_identifier_sim']).to include('TestOtherID') end it 'should index labels for master files' do FactoryBot.create(:master_file, :with_structure, media_object: media_object, title: 'Test Label') From 4d85784dcc66baf176f8a7e8daeb4a8de5d8d6c8 Mon Sep 17 00:00:00 2001 From: Mason Ballengee Date: Thu, 9 Nov 2023 17:11:34 -0500 Subject: [PATCH 6/7] Clean up indexing terms --- app/controllers/catalog_controller.rb | 19 +++++++---------- app/models/media_object.rb | 4 ++-- app/models/mods_behaviors.rb | 6 +++--- .../speedy_af/proxy/media_object.rb | 1 - spec/controllers/catalog_controller_spec.rb | 6 +++--- spec/factories/master_files.rb | 7 ++++--- spec/features/capybara_catalog_spec.rb | 21 +++++++++++++------ 7 files changed, 35 insertions(+), 29 deletions(-) diff --git a/app/controllers/catalog_controller.rb b/app/controllers/catalog_controller.rb index e33d2707e9..740df43aa5 100644 --- a/app/controllers/catalog_controller.rb +++ b/app/controllers/catalog_controller.rb @@ -82,7 +82,7 @@ class CatalogController < ApplicationController # facet bar config.add_facet_field 'avalon_resource_type_ssim', label: 'Format', limit: 5, collapse: false config.add_facet_field 'creator_ssim', label: 'Main contributor', limit: 5 - config.add_facet_field 'date_ssim', label: 'Date', limit: 5 + config.add_facet_field 'date_sim', label: 'Date', limit: 5 config.add_facet_field 'genre_ssim', label: 'Genres', limit: 5 config.add_facet_field 'series_ssim', label: 'Series', limit: 5 config.add_facet_field 'collection_ssim', label: 'Collection', limit: 5 @@ -111,7 +111,7 @@ class CatalogController < ApplicationController # solr fields to be displayed in the index (search results) view # The ordering of the field names is the order of the display config.add_index_field 'title_tesi', label: 'Title', if: Proc.new {|context, _field_config, _document| context.request.format == :json } - config.add_index_field 'date_ssi', label: 'Date', helper_method: :combined_display_date + config.add_index_field 'date_issued_ssi', label: 'Date', helper_method: :combined_display_date config.add_index_field 'creator_ssim', label: 'Main contributors', helper_method: :contributor_index_display config.add_index_field 'abstract_ssi', label: 'Summary', helper_method: :description_index_display config.add_index_field 'duration_ssi', label: 'Duration', if: Proc.new {|context, _field_config, _document| context.request.format == :json } @@ -120,18 +120,15 @@ class CatalogController < ApplicationController # solr fields to be displayed in the show (single result) view # The ordering of the field names is the order of the display config.add_show_field 'title_tesi', label: 'Title' - config.add_show_field 'format_ssim', label: 'Format' - config.add_show_field 'creator_ssim', label: 'Creator' + config.add_show_field 'resource_type_ssim', label: 'Format' + config.add_show_field 'creator_ssim', label: 'Main Contributors' config.add_show_field 'language_ssim', label: 'Language' - config.add_show_field 'date_ssi', label: 'Date' + config.add_show_field 'date_issued_ssi', label: 'Date' config.add_show_field 'abstract_ssim', label: 'Abstract' config.add_show_field 'location_ssim', label: 'Locations' - config.add_show_field 'time_period_sim', label: 'Time periods' config.add_show_field 'contributor_ssim', label: 'Contributors' config.add_show_field 'publisher_ssim', label: 'Publisher' config.add_show_field 'genre_ssim', label: 'Genre' - config.add_show_field 'publication_location_ssim', label: 'Place of publication' - config.add_show_field 'terms_ssim', label: 'Terms' # "fielded" search configuration. Used by pulldown among other places. # For supported keys in hash, see rdoc for Blacklight::SearchFields @@ -191,10 +188,10 @@ class CatalogController < ApplicationController # label in pulldown is followed by the name of the SOLR field to sort by and # whether the sort is ascending or descending (it must be asc or desc # except in the relevancy case). - config.add_sort_field 'score desc, title_ssort asc, date_ssi desc', label: 'Relevance' - config.add_sort_field 'date_ssi desc, title_ssort asc', label: 'Date' + config.add_sort_field 'score desc, title_ssort asc, date_issued_ssi desc', label: 'Relevance' + config.add_sort_field 'date_issued_ssi desc, title_ssort asc', label: 'Date' config.add_sort_field 'creator_ssort asc, title_ssort asc', label: 'Main contributor' - config.add_sort_field 'title_ssort asc, date_ssi desc', label: 'Title' + config.add_sort_field 'title_ssort asc, date_issued_ssi desc', label: 'Title' config.add_sort_field 'timestamp desc', label: 'Recently Updated', if: false # If there are more than this many search results, no spelling ("did you diff --git a/app/models/media_object.rb b/app/models/media_object.rb index 94887b3462..c84f83a331 100644 --- a/app/models/media_object.rb +++ b/app/models/media_object.rb @@ -287,8 +287,8 @@ def to_solr(include_child_fields: false) all_text_values << solr_doc["language_ssim"] all_text_values << solr_doc["physical_description_ssim"] all_text_values << solr_doc["series_ssim"] - all_text_values << solr_doc["date_ssim"] - all_text_values << solr_doc["note_ssm"] + all_text_values << solr_doc["date_sim"] + all_text_values << solr_doc["notes_sim"] all_text_values << solr_doc["table_of_contents_ssim"] all_text_values << solr_doc["other_identifier_ssm"] solr_doc["all_text_timv"] = all_text_values.flatten diff --git a/app/models/mods_behaviors.rb b/app/models/mods_behaviors.rb index dae72f040c..8aac333eaa 100644 --- a/app/models/mods_behaviors.rb +++ b/app/models/mods_behaviors.rb @@ -80,11 +80,11 @@ def to_solr(solr_doc = Hash.new, opts = {}) solr_doc['other_identifier_sim'] = gather_terms(self.find_by_terms(:other_identifier)) # Extract 4-digit year for creation date facet in Hydra and pub_date facet in Blacklight - solr_doc['date_ssi'] = self.find_by_terms(:date_issued).text + solr_doc['date_issued_ssi'] = self.find_by_terms(:date_issued).text solr_doc['date_created_ssi'] = self.find_by_terms(:date_created).text # Put both publication date and creation date into the date facet - solr_doc['date_ssim'] = gather_years(solr_doc['date_ssi']) - solr_doc['date_ssim'] += gather_years(solr_doc['date_created_ssi']) if solr_doc['date_created_ssi'].present? + solr_doc['date_sim'] = gather_years(solr_doc['date_issued_ssi']) + solr_doc['date_sim'] += gather_years(solr_doc['date_created_ssi']) if solr_doc['date_created_ssi'].present? # For full text, we stuff it into the mods_tesim field which is already configured for Mods doucments solr_doc['mods_tesim'] = self.ng_xml.xpath('//text()').collect { |t| t.text } diff --git a/app/presenters/speedy_af/proxy/media_object.rb b/app/presenters/speedy_af/proxy/media_object.rb index 22a886405a..13189f1d23 100644 --- a/app/presenters/speedy_af/proxy/media_object.rb +++ b/app/presenters/speedy_af/proxy/media_object.rb @@ -27,7 +27,6 @@ def initialize(solr_document, instance_defaults = {}) end # Handle this case here until a better fix can be found for multiple solr fields which don't have a model property @attrs[:section_id] = solr_document["section_id_ssim"] - @attrs[:date_issued] = solr_document["date_ssi"] @attrs[:hidden?] = solr_document["hidden_bsi"] @attrs[:read_groups] = solr_document["read_access_group_ssim"] || [] @attrs[:edit_groups] = solr_document["edit_access_group_ssim"] || [] diff --git a/spec/controllers/catalog_controller_spec.rb b/spec/controllers/catalog_controller_spec.rb index 4ea262f4d2..939f4701ad 100644 --- a/spec/controllers/catalog_controller_spec.rb +++ b/spec/controllers/catalog_controller_spec.rb @@ -172,7 +172,7 @@ describe "search fields" do let(:media_object) { FactoryBot.create(:fully_searchable_media_object) } - ["title_tesi", "creator_ssim", "contributor_ssim", "unit_ssim", "collection_ssim", "abstract_ssi", "publisher_ssim", "topical_subject_ssim", "geographic_subject_ssim", "temporal_subject_ssim", "genre_ssim", "physical_description_ssim", "language_ssim", "date_ssim", "notes_sim", "table_of_contents_ssim", "other_identifier_sim", "series_ssim" ].each do |field| + ["title_tesi", "creator_ssim", "contributor_ssim", "unit_ssim", "collection_ssim", "abstract_ssi", "publisher_ssim", "topical_subject_ssim", "geographic_subject_ssim", "temporal_subject_ssim", "genre_ssim", "physical_description_ssim", "language_ssim", "date_sim", "notes_sim", "table_of_contents_ssim", "other_identifier_sim", "series_ssim" ].each do |field| it "should find results based upon #{field}" do query = Array(media_object.to_solr[field]).first #split on ' ' and only search on the first word of a multiword field value @@ -219,11 +219,11 @@ let!(:m3) { FactoryBot.create(:published_media_object, title: 'Doo', date_issued: '1980', creator: ['Wilma'], visibility: 'public') } it "should sort correctly by title" do - get :index, params: { :sort => 'title_ssort asc, date_ssi desc' } + get :index, params: { :sort => 'title_ssort asc, date_issued_ssi desc' } expect(assigns(:response).documents.map(&:id)).to eq [m2.id, m3.id, m1.id] end it "should sort correctly by date" do - get :index, params: { :sort => 'date_ssi desc, title_ssort asc' } + get :index, params: { :sort => 'date_issued_ssi desc, title_ssort asc' } expect(assigns(:response).documents.map(&:id)).to eq [m3.id, m2.id, m1.id] end it "should sort correctly by creator" do diff --git a/spec/factories/master_files.rb b/spec/factories/master_files.rb index 2d036eb24b..e99849d8d5 100644 --- a/spec/factories/master_files.rb +++ b/spec/factories/master_files.rb @@ -1,11 +1,11 @@ # Copyright 2011-2023, The Trustees of Indiana University and Northwestern # University. Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. -# +# # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software distributed # under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR # CONDITIONS OF ANY KIND, either express or implied. See the License for the @@ -24,6 +24,7 @@ original_frame_size { '1024X768' } width { '1024' } height { '768' } + date_digitized { Time.now.utc.iso8601 } sequence(:workflow_id) diff --git a/spec/features/capybara_catalog_spec.rb b/spec/features/capybara_catalog_spec.rb index 6a99a7ae67..f7a3825d53 100644 --- a/spec/features/capybara_catalog_spec.rb +++ b/spec/features/capybara_catalog_spec.rb @@ -22,13 +22,16 @@ before :each do media_object.read_groups += ['ldap_group'] media_object.save! + # Perform indexing job so master file specific fields are added to the Media Object solr_doc + MediaObjectIndexingJob.perform_now(media_object.id) end it 'verifies presence of all facet fields' do visit '/catalog' - ['avalon_resource_type', 'creator', 'date', 'genre', 'series', 'collection', 'unit', 'language'].each do |field| + ['avalon_resource_type', 'creator', 'genre', 'series', 'collection', 'unit', 'language'].each do |field| expect(page).to have_selector(:id, "facet-#{field}_ssim-header") end + expect(page).to have_selector(:id, "facet-date_sim-header") ['workflow_published_sim', 'avalon_uploader_ssi', 'read_access_group_ssim', 'read_access_virtual_group_ssim', 'date_digitized_ssim', 'date_ingested_ssim'].each do |field| expect(page).to_not have_selector(:id, "facet-#{field}-header") end @@ -41,15 +44,17 @@ before :each do media_object.read_groups += ['ldap_group'] media_object.save! + MediaObjectIndexingJob.perform_now(media_object.id) @user = FactoryBot.create(:user) login_as @user, scope: :user end it 'verifies presence of all facet fields' do visit '/catalog' - ['avalon_resource_type', 'creator', 'date', 'genre', 'series', 'collection', 'unit', 'language'].each do |field| + ['avalon_resource_type', 'creator', 'genre', 'series', 'collection', 'unit', 'language'].each do |field| expect(page).to have_selector(:id, "facet-#{field}_ssim-header") end + expect(page).to have_selector(:id, "facet-date_sim-header") ['workflow_published_sim', 'avalon_uploader_ssi', 'read_access_group_ssim', 'read_access_virtual_group_ssim', 'date_digitized_ssim', 'date_ingested_ssim'].each do |field| expect(page).to_not have_selector(:id, "facet-#{field}-header") end @@ -64,15 +69,17 @@ before :each do media_object.read_groups += ['ldap_group'] media_object.save! + MediaObjectIndexingJob.perform_now(media_object.id) login_as manager, scope: :user end it 'verifies presence of all facet fields' do visit '/catalog' - ['avalon_resource_type', 'creator', 'date', 'genre', 'series', 'collection', 'unit', 'language'].each do |field| + ['avalon_resource_type', 'creator', 'genre', 'series', 'collection', 'unit', 'language'].each do |field| expect(page).to have_selector(:id, "facet-#{field}_ssim-header") end - ['workflow_published_sim', 'avalon_uploader_ssi', 'read_access_group_ssim', 'read_access_virtual_group_ssim', 'date_ingested_ssim'].each do |field| + expect(page).to have_selector(:id, "facet-date_sim-header") + ['workflow_published_sim', 'avalon_uploader_ssi', 'read_access_group_ssim', 'read_access_virtual_group_ssim', 'date_digitized_ssim', 'date_ingested_ssim'].each do |field| expect(page).to have_selector(:id, "facet-#{field}-header") end end @@ -84,16 +91,18 @@ before :each do media_object.read_groups += ['ldap_group'] media_object.save! + MediaObjectIndexingJob.perform_now(media_object.id) @user = FactoryBot.create(:administrator) login_as @user, scope: :user end it 'verifies presence of all facet fields' do visit '/catalog' - ['avalon_resource_type', 'creator', 'date', 'genre', 'series', 'collection', 'unit', 'language'].each do |field| + ['avalon_resource_type', 'creator', 'genre', 'series', 'collection', 'unit', 'language'].each do |field| expect(page).to have_selector(:id, "facet-#{field}_ssim-header") end - ['workflow_published_sim', 'avalon_uploader_ssi', 'read_access_group_ssim', 'read_access_virtual_group_ssim', 'date_ingested_ssim'].each do |field| + expect(page).to have_selector(:id, "facet-date_sim-header") + ['workflow_published_sim', 'avalon_uploader_ssi', 'read_access_group_ssim', 'read_access_virtual_group_ssim', 'date_digitized_ssim', 'date_ingested_ssim'].each do |field| expect(page).to have_selector(:id, "facet-#{field}-header") end end From 6e632b15dad1309d4681aa6b299bc3dbd19bd0cd Mon Sep 17 00:00:00 2001 From: Mason Ballengee <68433277+masaball@users.noreply.github.com> Date: Mon, 13 Nov 2023 09:24:38 -0500 Subject: [PATCH 7/7] Use other_identifier_sim in all_text_values `other_identifier_ssm` is an array of JSON hashes, while `other_identifier_sim` is just the identifier values. We should use these values in the `all_text_value` field to match the original implementation and avoid pitfalls related to feeding JSON into the searchable field. Co-authored-by: Chris Colvard --- app/models/media_object.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/media_object.rb b/app/models/media_object.rb index c84f83a331..66def63539 100644 --- a/app/models/media_object.rb +++ b/app/models/media_object.rb @@ -290,7 +290,7 @@ def to_solr(include_child_fields: false) all_text_values << solr_doc["date_sim"] all_text_values << solr_doc["notes_sim"] all_text_values << solr_doc["table_of_contents_ssim"] - all_text_values << solr_doc["other_identifier_ssm"] + all_text_values << solr_doc["other_identifier_sim"] solr_doc["all_text_timv"] = all_text_values.flatten solr_doc.each_pair { |k,v| solr_doc[k] = v.is_a?(Array) ? v.select { |e| e =~ /\S/ } : v } end