Skip to content

Commit 7f7bf49

Browse files
authored
Merge pull request #1157 from MITLibraries/timx-211-nil-date-handling
Better error handling for dates
2 parents 535210d + 21e4acf commit 7f7bf49

File tree

4 files changed

+553
-6
lines changed

4 files changed

+553
-6
lines changed

app/models/normalize_timdex.rb

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,18 @@ def extract_title(record)
3838
end
3939

4040
def extract_dates(dates)
41-
# It is unlikely for a record to have more than one creation date, but just in case...
42-
relevant_dates = dates.select { |date| date['kind'] == 'creation' }
41+
# It is unlikely for a record to have more than one creation or publication date, but just in case...
42+
relevant_dates = dates.select { |date| date['kind'] == 'creation' || date['kind'] == 'publication' }
4343

44-
# If the record *does* have more than one creation date, it's probably not worth determining which to display.
45-
return if relevant_dates.count > 1
44+
# If the record has no creation or publication date, stop here.
45+
return if relevant_dates.empty?
4646

47+
# If the record *does* have more than one creation/pub date, just take the first one.
4748
relevant_date = relevant_dates.first
4849

49-
# We are only concerned with creation dates that are ranges, since we harvest ASpace metadata at the collection
50+
# We are only concerned with creation/pub dates that are ranges, since we harvest ASpace metadata at the collection
5051
# level.
51-
return unless relevant_date['kind'] == 'creation' && relevant_date['range'].present?
52+
return unless relevant_date['range'].present?
5253

5354
"#{relevant_date['range']['gte']}-#{relevant_date['range']['lte']}"
5455
end

test/models/normalize_timdex_test.rb

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,46 @@ def aspace_records
3636
assert_match 'The Paul Earls archives contains a large number of music manuscript',
3737
aspace_records['results'][0].blurb
3838
end
39+
40+
# Some ASpace records have a publication date range rather than a creation date range. The following regression test
41+
# ensures that the NormalizeTimdex model does not error for such records.
42+
test 'records with no creation date do not error' do
43+
VCR.use_cassette('aspace publication date',
44+
allow_playback_repeats: true) do
45+
raw_query = SearchTimdex.new.search('SOVEREIGN INTIMACY: PRIVATE MEDIA AND THE TRACES OF COLONIAL VIOLENCE')
46+
assert_not raw_query['data']['search']['records'].first['dates'].first['kind'] == 'creation'
47+
assert_nothing_raised do
48+
NormalizeTimdex.new.to_result(raw_query, 'SOVEREIGN INTIMACY: PRIVATE MEDIA AND THE TRACES OF COLONIAL VIOLENCE')
49+
end
50+
end
51+
end
52+
53+
# This is another regression test. Now that we know that records with no creation date will not error, we also want
54+
# to ensure that records with a publication date will be normalized accordingly.
55+
test 'publication dates are normalized' do
56+
VCR.use_cassette('aspace publication date',
57+
allow_playback_repeats: true) do
58+
raw_query = SearchTimdex.new.search('SOVEREIGN INTIMACY: PRIVATE MEDIA AND THE TRACES OF COLONIAL VIOLENCE')
59+
normalized = NormalizeTimdex.new.to_result(raw_query, 'SOVEREIGN INTIMACY: PRIVATE MEDIA AND THE TRACES OF COLONIAL VIOLENCE')
60+
61+
assert_equal 'publication', raw_query['data']['search']['records'].first['dates'].first['kind']
62+
assert_equal '1940', raw_query['data']['search']['records'].first['dates'].first['range']['gte']
63+
assert_equal '1983', raw_query['data']['search']['records'].first['dates'].first['range']['lte']
64+
assert_equal '1940-1983', normalized['results'][0].year
65+
end
66+
end
67+
68+
test 'normalizer selects the first of two relevant dates' do
69+
VCR.use_cassette('aspace multiple creation dates') do
70+
raw_query = SearchTimdex.new.search('Timurid Architecture Research Archive')
71+
assert raw_query['data']['search']['records'].first['dates'].count > 1
72+
assert_not_equal raw_query['data']['search']['records'].first['dates'].first,
73+
raw_query['data']['search']['records'].first['dates'].second
74+
75+
start_date = raw_query['data']['search']['records'].first['dates'].first['range']['gte']
76+
end_date = raw_query['data']['search']['records'].first['dates'].first['range']['lte']
77+
normalized = NormalizeTimdex.new.to_result(raw_query, 'Timurid Architecture Research Archive')
78+
assert_equal "#{start_date}-#{end_date}", normalized['results'][0].year
79+
end
80+
end
3981
end

0 commit comments

Comments
 (0)