Skip to content

Commit

Permalink
Merge pull request #5704 from avalonmediasystem/batch_captions
Browse files Browse the repository at this point in the history
Support creation of multiple captions in batch ingest
  • Loading branch information
masaball authored Apr 5, 2024
2 parents e39ddac + 57f9796 commit d3a94c3
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 13 deletions.
1 change: 1 addition & 0 deletions app/jobs/ingest_batch_entry_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def process_success(batch_entry, entry)
old_media_object_id = batch_entry.media_object_pid
batch_entry.media_object_pid = entry.media_object.id
batch_entry.complete = true
batch_entry.current_status = 'Completed'
batch_entry.save!
# Delete pre-existing media object
MediaObject.find(old_media_object_id).destroy if old_media_object_id.present? && MediaObject.exists?(old_media_object_id)
Expand Down
35 changes: 27 additions & 8 deletions lib/avalon/batch/entry.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def self.from_json(json)
json_hash = JSON.parse(json)
opts = json_hash.except("fields", "files", "position")
opts[:collection] = Admin::Collection.find(json_hash["collection"])
self.new(json_hash["fields"].symbolize_keys, json_hash["files"].map(&:symbolize_keys!), opts.symbolize_keys, json_hash["position"], nil)
self.new(json_hash["fields"].symbolize_keys, json_hash["files"].map(&:deep_symbolize_keys!), opts.symbolize_keys, json_hash["position"], nil)
end

def user_key
Expand Down Expand Up @@ -176,17 +176,21 @@ def self.offset_valid?( offset )
true
end

def self.attach_datastreams_to_master_file( master_file, filename )
def self.attach_datastreams_to_master_file( master_file, filename, captions )
structural_file = "#{filename}.structure.xml"
if FileLocator.new(structural_file).exist?
master_file.structuralMetadata.content=FileLocator.new(structural_file).reader
master_file.structuralMetadata.original_name = structural_file
end
captions_file = "#{filename}.vtt"
if FileLocator.new(captions_file).exist?
master_file.captions.content=FileLocator.new(captions_file).reader
master_file.captions.mime_type='text/vtt'
master_file.captions.original_name = captions_file
captions.each do |c|
next unless c.present? && c[:caption_file].present? && FileLocator.new(c[:caption_file]).exist?
filename = c[:caption_file].split('/').last
label = c[:caption_label].presence || filename
language = c[:caption_language].present? ? caption_language(c[:caption_language]) : Settings.caption_default.language
supplemental_file = SupplementalFile.new(label: label, tags: ['caption'], language: language)
supplemental_file.file.attach(io: FileLocator.new(c[:caption_file]).reader, filename: filename)
supplemental_file.save
master_file.supplemental_files += [supplemental_file]
end
end

Expand All @@ -198,7 +202,8 @@ def process!
# master_file.save(validate: false) #required: need id before setting media_object
# master_file.media_object = media_object
files = self.class.gatherFiles(file_spec[:file])
self.class.attach_datastreams_to_master_file(master_file, file_spec[:file])
captions = gather_captions(file_spec).values
self.class.attach_datastreams_to_master_file(master_file, file_spec[:file], captions)
master_file.setContent(files, dropbox_dir: media_object.collection.dropbox_absolute_path)

# Overwrite files hash with working file paths to pass to matterhorn
Expand Down Expand Up @@ -263,11 +268,25 @@ def self.derivativePath(filename, quality)
filename.dup.insert(filename.rindex('.'), ".#{quality}")
end

def self.caption_language(language)
begin
LanguageTerm.find(language.capitalize).code
rescue LanguageTerm::LookupError
Settings.caption_default.language
end
end
private_class_method :caption_language

private

def hidden
!!opts[:hidden]
end

def gather_captions(file)
[] unless file.keys.any? { |k| k.to_s.include?('caption') }
file.select { |f| f.to_s.include?('caption') }
end
end
end
end
20 changes: 18 additions & 2 deletions lib/avalon/batch/manifest.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class Manifest
extend Forwardable

EXTENSIONS = ['csv','xls','xlsx','ods']
FILE_FIELDS = [:file,:label,:offset,:skip_transcoding,:absolute_location,:date_digitized]
FILE_FIELDS = [:file,:label,:offset,:skip_transcoding,:absolute_location,:date_digitized, :caption_file, :caption_label, :caption_language]
SKIP_FIELDS = [:collection]

def_delegators :@entries, :each
Expand Down Expand Up @@ -102,6 +102,18 @@ def true?(value)
not (value.to_s =~ /^(y(es)?|t(rue)?)$/i).nil?
end

def process_captions(field, content, values, i)
if field.to_s.include?('file')
@caption_count += 1
@caption_key = "caption_#{@caption_count}".to_sym
content.last[@caption_key] = {}
# Set file path to caption file
content.last[@caption_key][field] = path_to(values[i])
end
# Set caption metadata fields
content.last[@caption_key][field] ||= values[i]
end

def create_entries!
first = @spreadsheet.first_row + 2
last = @spreadsheet.last_row
Expand All @@ -117,10 +129,15 @@ def create_entries!
content=[]

fields = Hash.new { |h,k| h[k] = [] }
@caption_count = 0
@field_names.each_with_index do |f,i|
unless f.blank? || SKIP_FIELDS.include?(f) || values[i].blank?
if FILE_FIELDS.include?(f)
content << {} if f == :file
if f.to_s.include?('caption')
process_captions(f, content, values, i)
next
end
content.last[f] = f == :skip_transcoding ? true?(values[i]) : values[i]
else
fields[f] << values[i]
Expand All @@ -140,7 +157,6 @@ def create_entries!
entries << Entry.new(fields.select { |f| !FILE_FIELDS.include?(f) }, files, opts, index, self)
end
end

end
end
end
Binary file modified spec/fixtures/dropbox/example_batch_ingest/batch_manifest.xlsx
Binary file not shown.
32 changes: 29 additions & 3 deletions spec/lib/avalon/batch/entry_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -176,20 +176,46 @@
expect(master_file.absolute_location).to eq(Avalon::FileResolver.new.path_to(master_file.file_location))
expect(master_file.date_digitized).to eq('2015-10-30T00:00:00Z')
end

context 'with caption files' do
let(:caption_file) { File.join(Rails.root, 'spec/fixtures/dropbox/example_batch_ingest/assets/sheephead_mountain.mov.vtt')}
let(:caption) {{ :caption_file => caption_file, :caption_label => 'Sheephead Captions', :caption_language => 'English' }}
let(:entry_files) { [{ file: File.join(testdir, filename), offset: '00:00:00.500', label: 'Quis quo', date_digitized: '2015-10-30', skip_transcoding: false, caption_1: caption }] }

it 'adds captions to masterfile' do
expect(master_file.supplemental_file_captions).to be_present
end
end
end

describe '#attach_datastreams_to_master_file' do
let(:master_file) { FactoryBot.build(:master_file) }
let(:master_file) { FactoryBot.create(:master_file) }
let(:filename) { File.join(Rails.root, 'spec/fixtures/dropbox/example_batch_ingest/assets/sheephead_mountain.mov') }
let(:caption_file) { File.join(Rails.root, 'spec/fixtures/dropbox/example_batch_ingest/assets/sheephead_mountain.mov.vtt')}
let(:caption) { [{ :caption_file => caption_file, :caption_label => 'Sheephead Captions', :caption_language => 'English' }] }

before do
Avalon::Batch::Entry.attach_datastreams_to_master_file(master_file, filename)
Avalon::Batch::Entry.attach_datastreams_to_master_file(master_file, filename, caption)
end

it 'should attach structural metadata' do
expect(master_file.structuralMetadata.has_content?).to be_truthy
end
it 'should attach captions' do
expect(master_file.captions.has_content?).to be_truthy
expect(master_file.supplemental_file_captions).to be_present
end

context 'with multiple captions' do
let(:caption) { [{ :caption_file => caption_file, :caption_label => 'Sheephead Captions', :caption_language => 'english' },
{ :caption_file => caption_file, :caption_label => 'Second Caption', :caption_language => 'fre' }] }
it 'should attach all captions to master file' do
expect(master_file.supplemental_file_captions).to be_present
expect(master_file.supplemental_file_captions.count).to eq 2
expect(master_file.supplemental_file_captions[0].label).to eq 'Sheephead Captions'
expect(master_file.supplemental_file_captions[1].label).to eq 'Second Caption'
expect(master_file.supplemental_file_captions[0].language).to eq 'eng'
expect(master_file.supplemental_file_captions[1].language).to eq 'fre'
end
end
end

Expand Down

0 comments on commit d3a94c3

Please sign in to comment.