Skip to content

Commit

Permalink
Merge pull request #5425 from avalonmediasystem/speedyaf_missing_data
Browse files Browse the repository at this point in the history
Adjust indexing so all MO metadata values get added to proxy
  • Loading branch information
cjcolvar authored Nov 15, 2023
2 parents 883f87e + 032310a commit 55974ff
Show file tree
Hide file tree
Showing 11 changed files with 234 additions and 78 deletions.
39 changes: 18 additions & 21 deletions app/controllers/catalog_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@ class CatalogController < ApplicationController
config.add_facet_field 'avalon_resource_type_ssim', label: 'Format', limit: 5, collapse: false
config.add_facet_field 'creator_ssim', label: 'Main contributor', limit: 5
config.add_facet_field 'date_sim', label: 'Date', limit: 5
config.add_facet_field 'genre_sim', label: 'Genres', limit: 5
config.add_facet_field 'genre_ssim', label: 'Genres', limit: 5
config.add_facet_field 'series_ssim', label: 'Series', limit: 5
config.add_facet_field 'collection_ssim', label: 'Collection', limit: 5
config.add_facet_field 'unit_ssim', label: 'Unit', limit: 5
config.add_facet_field 'language_sim', label: 'Language', limit: 5
config.add_facet_field 'language_ssim', label: 'Language', limit: 5
# Hide these facets if not a Collection Manager
config.add_facet_field 'workflow_published_sim', label: 'Published', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow"
config.add_facet_field 'avalon_uploader_ssi', label: 'Created by', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow"
Expand All @@ -97,8 +97,8 @@ class CatalogController < ApplicationController
private: { label: "Private", fq: "has_model_ssim:MediaObject AND NOT read_access_group_ssim:#{Hydra::AccessControls::AccessRight::PERMISSION_TEXT_VALUE_PUBLIC} AND NOT read_access_group_ssim:#{Hydra::AccessControls::AccessRight::PERMISSION_TEXT_VALUE_AUTHENTICATED}" }
}
config.add_facet_field 'read_access_virtual_group_ssim', label: 'External Group', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow", helper_method: :vgroup_display
config.add_facet_field 'date_digitized_sim', label: 'Date Digitized', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow"#, partial: 'blacklight/hierarchy/facet_hierarchy'
config.add_facet_field 'date_ingested_sim', label: 'Date Ingested', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow"
config.add_facet_field 'date_digitized_ssim', label: 'Date Digitized', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow"#, partial: 'blacklight/hierarchy/facet_hierarchy'
config.add_facet_field 'date_ingested_ssim', label: 'Date Ingested', limit: 5, if: Proc.new {|context, config, opts| Ability.new(context.current_user, context.user_session).can? :create, MediaObject}, group: "workflow"

# Have BL send all facet field names to Solr, which has been the default
# previously. Simply remove these lines if you'd rather use Solr request
Expand All @@ -111,27 +111,24 @@ class CatalogController < ApplicationController
# solr fields to be displayed in the index (search results) view
# The ordering of the field names is the order of the display
config.add_index_field 'title_tesi', label: 'Title', if: Proc.new {|context, _field_config, _document| context.request.format == :json }
config.add_index_field 'date_ssi', label: 'Date', helper_method: :combined_display_date
config.add_index_field 'date_issued_ssi', label: 'Date', helper_method: :combined_display_date
config.add_index_field 'creator_ssim', label: 'Main contributors', helper_method: :contributor_index_display
config.add_index_field 'summary_ssi', label: 'Summary', helper_method: :description_index_display
config.add_index_field 'abstract_ssi', label: 'Summary', helper_method: :description_index_display
config.add_index_field 'duration_ssi', label: 'Duration', if: Proc.new {|context, _field_config, _document| context.request.format == :json }
config.add_index_field 'section_id_ssim', label: 'Sections', if: Proc.new {|context, _field_config, _document| context.request.format == :json }, helper_method: :section_id_json_index_display

# solr fields to be displayed in the show (single result) view
# The ordering of the field names is the order of the display
config.add_show_field 'title_tesi', label: 'Title'
config.add_show_field 'format_sim', label: 'Format'
config.add_show_field 'creator_sim', label: 'Creator'
config.add_show_field 'language_sim', label: 'Language'
config.add_show_field 'date_ssi', label: 'Date'
config.add_show_field 'abstract_sim', label: 'Abstract'
config.add_show_field 'location_sim', label: 'Locations'
config.add_show_field 'time_period_sim', label: 'Time periods'
config.add_show_field 'contributor_sim', label: 'Contributors'
config.add_show_field 'publisher_sim', label: 'Publisher'
config.add_show_field 'genre_sim', label: 'Genre'
config.add_show_field 'publication_location_sim', label: 'Place of publication'
config.add_show_field 'terms_sim', label: 'Terms'
config.add_show_field 'resource_type_ssim', label: 'Format'
config.add_show_field 'creator_ssim', label: 'Main Contributors'
config.add_show_field 'language_ssim', label: 'Language'
config.add_show_field 'date_issued_ssi', label: 'Date'
config.add_show_field 'abstract_ssim', label: 'Abstract'
config.add_show_field 'location_ssim', label: 'Locations'
config.add_show_field 'contributor_ssim', label: 'Contributors'
config.add_show_field 'publisher_ssim', label: 'Publisher'
config.add_show_field 'genre_ssim', label: 'Genre'

# "fielded" search configuration. Used by pulldown among other places.
# For supported keys in hash, see rdoc for Blacklight::SearchFields
Expand Down Expand Up @@ -191,10 +188,10 @@ class CatalogController < ApplicationController
# label in pulldown is followed by the name of the SOLR field to sort by and
# whether the sort is ascending or descending (it must be asc or desc
# except in the relevancy case).
config.add_sort_field 'score desc, title_ssort asc, date_ssi desc', label: 'Relevance'
config.add_sort_field 'date_ssi desc, title_ssort asc', label: 'Date'
config.add_sort_field 'score desc, title_ssort asc, date_issued_ssi desc', label: 'Relevance'
config.add_sort_field 'date_issued_ssi desc, title_ssort asc', label: 'Date'
config.add_sort_field 'creator_ssort asc, title_ssort asc', label: 'Main contributor'
config.add_sort_field 'title_ssort asc, date_ssi desc', label: 'Title'
config.add_sort_field 'title_ssort asc, date_issued_ssi desc', label: 'Title'
config.add_sort_field 'timestamp desc', label: 'Recently Updated', if: false

# If there are more than this many search results, no spelling ("did you
Expand Down
4 changes: 3 additions & 1 deletion app/models/iiif_manifest_presenter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,12 @@ def display_unit(media_object)
end

def display_language(media_object)
return nil unless media_object.language.present?
media_object.language.collect { |l| l[:text] }.uniq
end

def display_related_item(media_object)
return nil unless media_object.related_item_url.present?
media_object.related_item_url.collect { |r| "<a href='#{r[:url]}'>#{r[:label]}</a>" }
end

Expand Down Expand Up @@ -180,7 +182,7 @@ def iiif_metadata_fields
metadata_field('Contributor', media_object.contributor),
metadata_field('Publisher', media_object.publisher),
metadata_field('Genre', media_object.genre),
metadata_field('Subject', media_object.subject),
metadata_field('Subject', media_object.topical_subject),
metadata_field('Time period', media_object.temporal_subject),
metadata_field('Location', media_object.geographic_subject),
metadata_field('Collection', display_collection(media_object)),
Expand Down
29 changes: 16 additions & 13 deletions app/models/media_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,10 @@ def section_physical_descriptions
def fill_in_solr_fields_that_need_master_files(solr_doc)
solr_doc['section_id_ssim'] = ordered_master_file_ids
solr_doc["other_identifier_sim"] += master_files.collect {|mf| mf.identifier.to_a }.flatten
solr_doc["date_digitized_sim"] = master_files.collect {|mf| mf.date_digitized }.compact.map {|t| Time.parse(t).strftime "%F" }
solr_doc["date_digitized_ssim"] = master_files.collect {|mf| mf.date_digitized }.compact.map {|t| Time.parse(t).strftime "%F" }
solr_doc["section_label_tesim"] = section_labels
solr_doc['section_physical_description_ssim'] = section_physical_descriptions
solr_doc['all_comments_sim'] = all_comments
solr_doc['all_comments_ssim'] = all_comments
end

# Enqueue background job to do a full indexing including more costly fields that read from children
Expand All @@ -257,9 +257,12 @@ def to_solr(include_child_fields: false)
solr_doc[Hydra.config.permissions.read.group] += solr_doc['read_access_ip_group_ssim']
solr_doc["title_ssort"] = self.title
solr_doc["creator_ssort"] = Array(self.creator).join(', ')
solr_doc["date_ingested_sim"] = self.create_date.strftime "%F" if self.create_date.present?
solr_doc["date_ingested_ssim"] = self.create_date.strftime "%F" if self.create_date.present?
solr_doc['avalon_resource_type_ssim'] = self.avalon_resource_type.map(&:titleize)
solr_doc['identifier_ssim'] = self.identifier.map(&:downcase)
solr_doc['note_ssm'] = self.note.collect { |n| n.to_json }
solr_doc['other_identifier_ssm'] = self.other_identifier.collect { |oi| oi.to_json }
solr_doc['related_item_url_ssm'] = self.related_item_url.collect { |r| r.to_json }
if include_child_fields
fill_in_solr_fields_that_need_master_files(solr_doc)
elsif id.present? # avoid error in test suite
Expand All @@ -272,21 +275,21 @@ def to_solr(include_child_fields: false)
all_text_values = []
all_text_values << solr_doc["title_tesi"]
all_text_values << solr_doc["creator_ssim"]
all_text_values << solr_doc["contributor_sim"]
all_text_values << solr_doc["contributor_ssim"]
all_text_values << solr_doc["unit_ssim"]
all_text_values << solr_doc["collection_ssim"]
all_text_values << solr_doc["summary_ssi"]
all_text_values << solr_doc["publisher_sim"]
all_text_values << solr_doc["subject_topic_sim"]
all_text_values << solr_doc["subject_geographic_sim"]
all_text_values << solr_doc["subject_temporal_sim"]
all_text_values << solr_doc["genre_sim"]
all_text_values << solr_doc["language_sim"]
all_text_values << solr_doc["physical_description_sim"]
all_text_values << solr_doc["abstract_ssi"]
all_text_values << solr_doc["publisher_ssim"]
all_text_values << solr_doc["topical_subject_ssim"]
all_text_values << solr_doc["geographic_subject_ssim"]
all_text_values << solr_doc["temporal_subject_ssim"]
all_text_values << solr_doc["genre_ssim"]
all_text_values << solr_doc["language_ssim"]
all_text_values << solr_doc["physical_description_ssim"]
all_text_values << solr_doc["series_ssim"]
all_text_values << solr_doc["date_sim"]
all_text_values << solr_doc["notes_sim"]
all_text_values << solr_doc["table_of_contents_sim"]
all_text_values << solr_doc["table_of_contents_ssim"]
all_text_values << solr_doc["other_identifier_sim"]
solr_doc["all_text_timv"] = all_text_values.flatten
solr_doc.each_pair { |k,v| solr_doc[k] = v.is_a?(Array) ? v.select { |e| e =~ /\S/ } : v }
Expand Down
60 changes: 32 additions & 28 deletions app/models/mods_behaviors.rb
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# Copyright 2011-2023, The Trustees of Indiana University and Northwestern
# University. Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
#
#
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
Expand Down Expand Up @@ -39,51 +39,51 @@ def to_solr(solr_doc = Hash.new, opts = {})
solr_doc['creator_ssim'] = gather_terms(self.find_by_terms(:creator))
# solr_doc['creator_ssi'] = self.find_by_terms(:creator).text
# Individual fields
solr_doc['summary_ssi'] = self.find_by_terms(:abstract).text
solr_doc['publisher_sim'] = gather_terms(self.find_by_terms(:publisher))
solr_doc['contributor_sim'] = gather_terms(self.find_by_terms(:contributor))
solr_doc['subject_sim'] = gather_terms(self.find_by_terms(:subject))
solr_doc['genre_sim'] = gather_terms(self.find_by_terms(:genre))
solr_doc['abstract_ssi'] = self.find_by_terms(:abstract).text
solr_doc['publisher_ssim'] = gather_terms(self.find_by_terms(:publisher))
solr_doc['contributor_ssim'] = gather_terms(self.find_by_terms(:contributor))
solr_doc['subject_ssim'] = gather_terms(self.find_by_terms(:subject))
solr_doc['genre_ssim'] = gather_terms(self.find_by_terms(:genre))
# solr_doc['physical_dtl_sim'] = gather_terms(self.find_by_terms(:format))
# solr_doc['contents_sim'] = gather_terms(self.find_by_terms(:parts_list))
solr_doc['notes_sim'] = gather_terms(self.find_by_terms(:note))
solr_doc['table_of_contents_sim'] = gather_terms(self.find_by_terms(:table_of_contents))
solr_doc['access_sim'] = gather_terms(self.find_by_terms(:usage))
solr_doc['table_of_contents_ssim'] = gather_terms(self.find_by_terms(:table_of_contents))
solr_doc['usage_ssim'] = gather_terms(self.find_by_terms(:usage))
# solr_doc['collection_sim'] = gather_terms(self.find_by_terms(:archival_collection))
solr_doc['series_ssim'] = gather_terms(self.find_by_terms(:series))
#filter formats based upon whitelist
solr_doc['format_sim'] = (gather_terms(self.find_by_terms(:resource_type)) & ['moving image', 'sound recording' ]).map(&:titleize)
solr_doc['location_sim'] = gather_terms(self.find_by_terms(:geographic_subject))
solr_doc['resource_type_ssim'] = (gather_terms(self.find_by_terms(:resource_type)) & ['moving image', 'sound recording' ]).map(&:titleize)
solr_doc['location_ssim'] = gather_terms(self.find_by_terms(:geographic_subject))

# Blacklight facets - these are the same facet fields used in our Blacklight app
# for consistency and so they'll show up when we export records from Hydra into BL:
solr_doc['material_sim'] = "Digital"
solr_doc['subject_topic_sim'] = gather_terms(self.find_by_terms(:topical_subject))
solr_doc['subject_geographic_sim'] = gather_terms(self.find_by_terms(:geographic_subject))
solr_doc['subject_temporal_sim'] = gather_terms(self.find_by_terms(:temporal_subject))
solr_doc['subject_occupation_sim'] = gather_terms(self.find_by_terms(:occupation_subject))
solr_doc['subject_person_sim'] = gather_terms(self.find_by_terms(:person_subject))
solr_doc['subject_corporate_sim'] = gather_terms(self.find_by_terms(:corporate_subject))
solr_doc['subject_family_sim'] = gather_terms(self.find_by_terms(:family_subject))
solr_doc['subject_title_sim'] = gather_terms(self.find_by_terms(:title_subject))
solr_doc['time_sim'] = gather_terms(self.find_by_terms(:temporal_subject))
solr_doc['material_ssim'] = "Digital"
solr_doc['topical_subject_ssim'] = gather_terms(self.find_by_terms(:topical_subject))
solr_doc['geographic_subject_ssim'] = gather_terms(self.find_by_terms(:geographic_subject))
solr_doc['temporal_subject_ssim'] = gather_terms(self.find_by_terms(:temporal_subject))
solr_doc['occupation_subject_ssim'] = gather_terms(self.find_by_terms(:occupation_subject))
solr_doc['person_subject_ssim'] = gather_terms(self.find_by_terms(:person_subject))
solr_doc['corporate_subject_ssim'] = gather_terms(self.find_by_terms(:corporate_subject))
solr_doc['family_subject_ssim'] = gather_terms(self.find_by_terms(:family_subject))
solr_doc['title_subject_ssim'] = gather_terms(self.find_by_terms(:title_subject))
solr_doc['time_ssim'] = gather_terms(self.find_by_terms(:temporal_subject))

# TODO: map PBcore's three-letter language codes to full language names
# Right now, everything's English.
solr_doc['language_sim'] = gather_terms(self.find_by_terms(:language_text))
solr_doc['language_code_sim'] = gather_terms(self.find_by_terms(:language_code))
solr_doc['physical_description_sim'] = gather_terms(self.find_by_terms(:physical_description))
solr_doc['language_ssim'] = gather_terms(self.find_by_terms(:language_text))
solr_doc['language_code_ssim'] = gather_terms(self.find_by_terms(:language_code))
solr_doc['physical_description_ssim'] = gather_terms(self.find_by_terms(:physical_description))
solr_doc['related_item_url_sim'] = gather_terms(self.find_by_terms(:related_item_url))
solr_doc['related_item_label_sim'] = gather_terms(self.find_by_terms(:related_item_label))
solr_doc['terms_of_use_si'] = (self.find_by_terms(:terms_of_use) - self.find_by_terms(:rights_statement)).text
solr_doc['terms_of_use_ssi'] = (self.find_by_terms(:terms_of_use) - self.find_by_terms(:rights_statement)).text
solr_doc['rights_statement_ssi'] = self.find_by_terms(:rights_statement).text
solr_doc['other_identifier_sim'] = gather_terms(self.find_by_terms(:other_identifier))

# Extract 4-digit year for creation date facet in Hydra and pub_date facet in Blacklight
solr_doc['date_ssi'] = self.find_by_terms(:date_issued).text
solr_doc['date_issued_ssi'] = self.find_by_terms(:date_issued).text
solr_doc['date_created_ssi'] = self.find_by_terms(:date_created).text
# Put both publication date and creation date into the date facet
solr_doc['date_sim'] = gather_years(solr_doc['date_ssi'])
solr_doc['date_sim'] = gather_years(solr_doc['date_issued_ssi'])
solr_doc['date_sim'] += gather_years(solr_doc['date_created_ssi']) if solr_doc['date_created_ssi'].present?

# For full text, we stuff it into the mods_tesim field which is already configured for Mods doucments
Expand Down Expand Up @@ -185,6 +185,10 @@ def gather_terms(terms)
terms.collect { |r| r.text }.compact.uniq
end

def gather_attribute(terms, attribute)
terms.collect { |t| t.attribute(attribute).value }
end

def gather_years(date)
parsed = Date.edtf(date)
return Array.new if parsed.nil?
Expand Down
10 changes: 9 additions & 1 deletion app/presenters/speedy_af/proxy/media_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

class SpeedyAF::Proxy::MediaObject < SpeedyAF::Base
SINGULAR_FIELDS = [:title, :statement_of_responsibility, :date_created, :date_issued, :copyright_date, :abstract, :terms_of_use, :rights_statement]
HASH_FIELDS = [:note, :other_identifier, :related_item_url]

# Override to handle section_id specially
def initialize(solr_document, instance_defaults = {})
Expand All @@ -26,7 +27,6 @@ def initialize(solr_document, instance_defaults = {})
end
# Handle this case here until a better fix can be found for multiple solr fields which don't have a model property
@attrs[:section_id] = solr_document["section_id_ssim"]
@attrs[:date_issued] = solr_document["date_ssi"]
@attrs[:hidden?] = solr_document["hidden_bsi"]
@attrs[:read_groups] = solr_document["read_access_group_ssim"] || []
@attrs[:edit_groups] = solr_document["edit_access_group_ssim"] || []
Expand All @@ -37,6 +37,10 @@ def initialize(solr_document, instance_defaults = {})
SINGULAR_FIELDS.each do |field_name|
@attrs[field_name] = Array(@attrs[field_name]).first
end

HASH_FIELDS.each do |field_name|
@attrs[field_name].collect! { |hf| JSON.parse(hf, :symbolize_names => true) }
end
# Convert empty strings to nil
@attrs.transform_values! { |value| value == "" ? nil : value }
end
Expand Down Expand Up @@ -140,6 +144,10 @@ def governing_policies
@governing_policies ||= Array(attrs[:isGovernedBy]).collect { |id| SpeedyAF::Base.find(id) }
end

def language
attrs[:language_code].present? ? attrs[:language_code].map { |code| { code: code, text: LanguageTerm.find(code).text } } : []
end

def sections_with_files(tag: '*')
ordered_master_file_ids.select { |m| SpeedyAF::Proxy::MasterFile.find(m).supplemental_files(tag: tag).present? }
end
Expand Down
Loading

0 comments on commit 55974ff

Please sign in to comment.