Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SFR-2171: Fixing OCLC bib author mapping #367

Merged
merged 7 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
- Changed HATHI_DATAFILES outdated link in development, example, and local yaml files
- Fixed edition API ID param
- Fixed usage type bug
- Fixed OCLC bib author mapping

## 2024-08-06 -- v0.13.1
## Added
Expand Down
88 changes: 57 additions & 31 deletions mappings/oclc_bib.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,47 +8,41 @@

class OCLCBibMapping(Core):
def __init__(self, oclc_bib, related_oclc_numbers=[]):
identifiers = oclc_bib['identifier']
self.record = self._map_to_record(oclc_bib, related_oclc_numbers)

def _map_to_record(self, oclc_bib, related_oclc_numbers=[]) -> Record:
identifiers = oclc_bib.get('identifier', {})
creators = self._get_creators(oclc_bib)
authors = self._get_authors(creators)
contributors = self._get_contributors(creators)

self.record = Record(
return Record(
uuid=uuid4(),
frbr_status='complete',
cluster_status=False,
source='oclcClassify',
source_id=f"{oclc_bib['work']['id']}|owi",
title=oclc_bib['title']['mainTitles'][0]['text'],
source_id=f"{oclc_bib.get('work', {}).get('id')}|owi",
title=oclc_bib.get('title', {}).get('mainTitles', [{}])[0].get('text'),
subjects=self._map_subjects(oclc_bib),
authors=self._map_authors(authors),
contributors=self._map_contributors(contributors),
identifiers=(
[f"{oclc_bib['work']['id']}|owi"] +
[f"{identifiers['oclcNumber']}|oclc"] +
[f"{oclc_bib.get('work', {}).get('id')}|owi"] +
[f"{identifiers.get('oclcNumber')}|oclc"] +
[f"{oclc_number}|oclc" for oclc_number in related_oclc_numbers]
),
date_created=datetime.now(timezone.utc).replace(tzinfo=None),
date_modified=datetime.now(timezone.utc).replace(tzinfo=None)
)

def createMapping(self):
pass

def applyFormatting(self):
pass

def applyMapping(self):
pass

def _get_creators(self, oclc_bib):
if not oclc_bib.get('contributor'):
return None

return list(
filter(
lambda creator: creator.get('secondName') and creator.get('firstName'),
oclc_bib['contributor'].get('creators', [])
lambda creator: creator.get('secondName') or creator.get('firstName'),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

oclc_bib.get('contributor', {}).get('creators', [])
)
)

Expand All @@ -58,9 +52,7 @@ def _get_authors(self, creators):

return list(
filter(
lambda creator:
creator.get('isPrimary', False) or
self._is_author(creator),
lambda creator: creator.get('isPrimary', False) or self._is_author(creator),
creators
)
)
Expand All @@ -71,34 +63,68 @@ def _get_contributors(self, creators):

return list(
filter(
lambda creator:
not creator.get('isPrimary', False) and
not self._is_author(creator),
lambda creator: not creator.get('isPrimary', False) and not self._is_author(creator),
creators
)
)

def _is_author(self, creator):
for role in set(map(lambda relator: relator.get('term').lower(), creator.get('relators', []))):
for role in set(map(lambda relator: relator.get('term', '').lower(), creator.get('relators', []))):
if 'author' in role.lower() or 'writer' in role.lower():
return True

return False

def _map_subjects(self, oclc_bib) -> list[str]:
return [f"{subject['subjectName']['text']}||{subject.get('vocabulary', '')}" for subject in oclc_bib.get('subjects', [])]
return [
f"{subject_name}||{subject.get('vocabulary', '')}"
for subject in oclc_bib.get('subjects', [])
if (subject_name := subject.get('subjectName', {}).get('text'))
]

def _map_authors(self, authors) -> Optional[list[str]]:
if not authors:
return None

return [f"{author['secondName']['text']}, {author['firstName']['text']}|||true" for author in authors]

return [
f'{author_name}|||true'
for author in authors
if (author_name := self._get_contributor_name(author))
]

def _map_contributors(self, contributors) -> Optional[list[str]]:
if not contributors:
return None

return [
f"{contributor['secondName']['text']}, {contributor['firstName']['text']}|||{', '.join(list(map(lambda relator: relator.get('term', ''), contributor.get('relators', []))))}"
for contributor in contributors
f"{contributor_name}|||{', '.join(list(map(lambda relator: relator.get('term', ''), contributor.get('relators', []))))}"
for contributor in contributors
if (contributor_name := self._get_contributor_name(contributor))
]

def _get_contributor_name(self, contributor) -> Optional[str]:
first_name = self._get_name(contributor.get('firstName'))
second_name = self._get_name(contributor.get('secondName'))

if not first_name and not second_name:
return None

if first_name and second_name:
return f'{second_name}, {first_name}'

return f'{first_name or second_name}'

def _get_name(self, name_data) -> Optional[str]:
if not name_data:
return None

return name_data.get('text') or name_data.get('romanizedText')

def createMapping(self):
pass

def applyFormatting(self):
pass

def applyMapping(self):
pass
84 changes: 84 additions & 0 deletions tests/unit/test_oclc_bib_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from mappings.oclc_bib import OCLCBibMapping

base_oclc_bib = {
'identifier': {
'oclcNumber': 1234
},
'work': {
'id': 1
},
'title': {
'mainTitles': [{
'text': 'The Story of DRB'
}]
},
'subjects': [{
'subjectName': {
'text': 'Subject'
},
'vocabulary': 'fast'
}],
'contributor': {
'creators': [{
'firstName': {
'text': 'Hathi'
},
'secondName': {
'text': 'Trust'
},
'isPrimary': True
}]
}
}


def test_oclc_bib_mapping_full_name():
oclc_bib_mapping = OCLCBibMapping(base_oclc_bib)

assert ['Trust, Hathi|||true'] == oclc_bib_mapping.record.authors


def test_oclc_bib_mapping_no_first_name():
base_oclc_bib['contributor'] = {
'creators': [{
'secondName': {
'text': 'Trust'
},
'isPrimary': True
}]
}

oclc_bib_mapping = OCLCBibMapping(base_oclc_bib)

assert ['Trust|||true'] == oclc_bib_mapping.record.authors

def test_oclc_bib_mapping_no_second_name():
base_oclc_bib['contributor'] = {
'creators': [{
'firstName': {
'text': 'Hathi'
},
'isPrimary': True
}]
}

oclc_bib_mapping = OCLCBibMapping(base_oclc_bib)

assert ['Hathi|||true'] == oclc_bib_mapping.record.authors

def test_oclc_bib_mapping_fallback_to_romanized_text():
base_oclc_bib['contributor'] = {
'creators': [{
'firstName': {
'romanizedText': 'Homer'
},
'secondName': {
'romanizedText': 'Simpson'
},
'isPrimary': True
}]
}

oclc_bib_mapping = OCLCBibMapping(base_oclc_bib)

assert ['Simpson, Homer|||true'] == oclc_bib_mapping.record.authors
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for adding these!

Loading