Skip to content

Commit

Permalink
Apply substitutions from CSV file for 260c date field. Part of #94
Browse files Browse the repository at this point in the history
  • Loading branch information
osma committed Jul 3, 2019
1 parent b0ced3c commit f87b161
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 2 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ refdata/RDAMediaType.nt:
%-preprocessed.alephseq: %-in.alephseq
uniq $< | scripts/filter-duplicates.py | $(UCONV) -x Any-NFC -i | scripts/filter-fennica-repl.py >$@

%.mrcx: %-preprocessed.alephseq refdata/iso639-2-fi.csv
$(CATMANDU) convert MARC --type ALEPHSEQ to MARC --type XML --fix scripts/filter-marc.fix --fix scripts/strip-personal-info.fix --fix scripts/preprocess-marc.fix <$< >$@
%.mrcx: %-preprocessed.alephseq refdata/iso639-2-fi.csv refdata/subst-260c.csv
$(CATMANDU) convert MARC --type ALEPHSEQ to MARC --type XML --fix scripts/filter-marc.fix --fix scripts/strip-personal-info.fix --fix scripts/preprocess-marc.fix --fix scripts/substitute-marc.fix <$< >$@

%-bf2.rdf: %.mrcx
$(XSLTPROC) --stringparam baseuri $(URIBASEFENNICA) $(MARC2BIBFRAME2)/xsl/marc2bibframe2.xsl $^ >$@
Expand Down
44 changes: 44 additions & 0 deletions scripts/extract-subst-260c.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/usr/bin/env python3

import sys
import csv

reader = csv.reader(sys.stdin, dialect='excel-tab')
writer = csv.writer(sys.stdout, quoting=csv.QUOTE_ALL)

for lineno, row in enumerate(reader):
if lineno == 0:
continue # skip header

recid = row[0]
orig260c = row[1]
new260c_from = row[4]
new260c_till = row[5]

key = recid + "/" + orig260c
val = new260c_from

if new260c_till:
val += "-" + new260c_till

# skip trivial cases (already handled by conversion)
if orig260c == val:
continue

if orig260c == val + ".":
continue

if orig260c == val + "-":
continue

if orig260c == val + "-.":
continue

if orig260c == "[" + val + "]":
continue

if orig260c == "[" + val + "].":
continue


writer.writerow([key, val])
11 changes: 11 additions & 0 deletions scripts/substitute-marc.fix
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Apply cleanup substitutions from CSV files to MARC records

marc_map('001',recid)
marc_map('260c',orig260c)
if exists(orig260c)
paste(substval,recid,orig260c,join_char:"/")
lookup(substval,'refdata/subst-260c.csv',delete:1)
if exists(substval)
marc_set('260c',$.substval)
end
end
6 changes: 6 additions & 0 deletions test/15_mrcx.bats
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,9 @@ setup () {
make slices/sjubroder-00450.mrcx
xmllint --format slices/sjubroder-00450.mrcx | grep -q 'tag="490"'
}

@test "MARCXML: cleans up bad 260c values" {
make slices/suoja-pirtti-00000.mrcx
run bash -c "xmllint --format slices/suoja-pirtti-00000.mrcx | grep -A 3 'tag=.260.' | grep 'code=.c.' | grep 'Merkur'"
[ "$status" -ne 0 ]
}

0 comments on commit f87b161

Please sign in to comment.