Skip to content

Commit

Permalink
Merge pull request #5900 from avalonmediasystem/deindex_transcripts
Browse files Browse the repository at this point in the history
Remove transcripts from indexes
  • Loading branch information
masaball authored Jul 8, 2024
2 parents 0fad664 + 0c0be4d commit d36a347
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 1 deletion.
2 changes: 2 additions & 0 deletions app/controllers/supplemental_files_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ def update
@supplemental_file.attach_file(attachment) if attachment

raise Avalon::SaveError, @supplemental_file.errors.full_messages unless @supplemental_file.save
# Updates parent object's solr document
@object.update_index

flash[:success] = "Supplemental file successfully updated."
respond_to do |format|
Expand Down
15 changes: 14 additions & 1 deletion app/models/concerns/supplemental_file_behavior.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,20 @@ module SupplementalFileBehavior
# @return [SupplementalFile]
def supplemental_files(tag: '*')
return [] if supplemental_files_json.blank?
files = JSON.parse(supplemental_files_json).collect { |file_gid| GlobalID::Locator.locate(file_gid) }
# If the supplemental_files_json becomes out of sync with the
# database after a delete, this check could fail. Have not
# encountered in a live environment but came up in automated
# testing. Adding a rescue on fail to locate allows us to skip
# these out of sync files.
files = JSON.parse(supplemental_files_json).collect do |file_gid|
begin
GlobalID::Locator.locate(file_gid)
rescue ActiveRecord::RecordNotFound
nil
end
end.compact
return [] if files.blank?

case tag
when '*'
files
Expand Down
5 changes: 5 additions & 0 deletions app/models/supplemental_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class SupplementalFile < ApplicationRecord
# See https://github.com/rails/rails/issues/37304
after_create_commit :index_file, prepend: true
after_update_commit :update_index, prepend: true
after_destroy_commit :remove_from_index

def attach_file(new_file)
file.attach(new_file)
Expand Down Expand Up @@ -103,6 +104,10 @@ def update_index
end
alias index_file update_index

def remove_from_index
ActiveFedora::SolrService.delete(to_global_id)
end

# Creates a solr document hash for the {#object}
# @return [Hash] the solr document
def to_solr
Expand Down
32 changes: 32 additions & 0 deletions spec/models/supplemental_file_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,38 @@
after_doc = ActiveFedora::SolrService.query("id:#{RSolr.solr_escape(transcript.to_global_id.to_s)}").first
expect(after_doc["transcript_tsim"].first).to eq("00:00:01.200 --> 00:00:21.000 [music]")
end

context 'caption as transcript' do
let(:transcript) { FactoryBot.create(:supplemental_file, :with_caption_file, tags: ['caption', 'transcript']) }

it 'removes the transcript_tsim content when transcript tag is removed' do
before_doc = ActiveFedora::SolrService.query("id:#{RSolr.solr_escape(transcript.to_global_id.to_s)}").first
expect(before_doc["transcript_tsim"].first).to eq "00:00:03.500 --> 00:00:05.000 Example captions"
transcript.tags = ['caption']
transcript.save
after_doc = ActiveFedora::SolrService.query("id:#{RSolr.solr_escape(transcript.to_global_id.to_s)}").first
expect(after_doc["transcript_tsim"]).to be_nil
end
end
end
end

describe "#remove_from_index" do
let(:transcript) { FactoryBot.create(:supplemental_file, :with_transcript_file, :with_transcript_tag) }

context 'on delete' do
it 'triggers callback' do
expect(transcript).to receive(:remove_from_index)
transcript.destroy
end

it 'removes the transcript from the index' do
before_doc = ActiveFedora::SolrService.query("id:#{RSolr.solr_escape(transcript.to_global_id.to_s)}").first
expect(before_doc['transcript_tsim']).to eq ["00:00:03.500 --> 00:00:05.000 Example captions"]
transcript.destroy
after_doc = ActiveFedora::SolrService.query("id:#{RSolr.solr_escape(transcript.to_global_id.to_s)}").first
expect(after_doc).to be_nil
end
end
end

Expand Down
45 changes: 45 additions & 0 deletions spec/support/supplemental_files_controller_examples.rb
Original file line number Diff line number Diff line change
Expand Up @@ -485,11 +485,32 @@
end
context "removing transcript designation" do
let(:supplemental_file) { FactoryBot.create(:supplemental_file, :with_transcript_file, tags: ['caption', 'transcript'], label: 'label (machine generated)') }

before do
if object.is_a?(MasterFile)
supplemental_file.parent_id = object.id
supplemental_file.save
mo = object.media_object
mo.sections = [object]
mo.save
end
end

it "removes transcript note from tags" do
expect {
put :update, params: { class_id => object.id, id: supplemental_file.id, supplemental_file: valid_update_attributes, format: :html }, session: valid_session
}.to change { master_file.reload.supplemental_files.first.tags }.from(['caption', 'transcript']).to(['caption'])
end

it 'removes transcript from parent media object\'s index' do
# Media object only looks at section files for has_transcript check,
# so we only test against the MasterFile case.
if object.is_a?(MasterFile)
expect{
put :update, params: { class_id => object.id, id: supplemental_file.id, supplemental_file:valid_update_attributes, format: :html}, session: valid_session
}.to change { object.media_object.to_solr(include_child_fields: true)['has_transcripts_bsi'] }.from(true).to(false)
end
end
end
end

Expand Down Expand Up @@ -605,5 +626,29 @@
expect(JSON.parse(response.body)["errors"]).to be_present
end
end

context 'supplemental file marked as transcript' do
let(:supplemental_file) { FactoryBot.create(:supplemental_file, :with_transcript_file, tags: ['caption', 'transcript']) }

before do
if object.is_a?(MasterFile)
supplemental_file.parent_id = object.id
supplemental_file.save
mo = object.media_object
mo.sections = [object]
mo.save
end
end

it 'removes transcript from parent media object\'s index' do
# Media object only looks at section files for has_transcript check,
# so we only test against the MasterFile case.
if object.is_a?(MasterFile)
expect{
delete :destroy, params: { class_id => object.id, id: supplemental_file.id, format: :html}, session: valid_session
}.to change { object.media_object.to_solr(include_child_fields: true)['has_transcripts_bsi'] }.from(true).to(false)
end
end
end
end
end

0 comments on commit d36a347

Please sign in to comment.