Skip to content

Commit

Permalink
NOREF-Better error handling & logging (#365)
Browse files Browse the repository at this point in the history
Return early if no query is formed and move noisy print statements in core process into debug log
  • Loading branch information
Apophenia authored Sep 16, 2024
1 parent 61f0cf5 commit e6cb3fa
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 15 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
- Finalizing OCLC implementation
- Implemented script to aggregate access logs
- Switching over to classify record by metadata v2
- Adding more specific logging and exception handling around OCLC manager errors

## Fixed
- Changed HATHI_DATAFILES outdated link in development, example, and local yaml files
Expand Down
35 changes: 24 additions & 11 deletions managers/oclc_catalog.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import requests
from requests.exceptions import Timeout, ConnectionError
from requests.exceptions import Timeout, ConnectionError, JSONDecodeError
from typing import Optional

from logger import createLog
Expand Down Expand Up @@ -96,17 +96,24 @@ def _get_other_editions(self, oclc_number: int, offset: int=0):
)

if other_editions_response.status_code != 200:
logger.warning(f'OCLC other editions request failed with status {other_editions_response.status_code}')
logger.warning(
f'OCLC search bibs request for OCLC no {oclc_number} failed '
f'with status {other_editions_response.status_code}')
try:
oclc_error_type = other_editions_response.json()["type"]
logger.debug(f'{oclc_number} request failure reason: {oclc_error_type}')
except (JSONDecodeError, KeyError):
logger.debug(f'No OCLC error type given for {oclc_number} request')
return None

return other_editions_response.json()
except Exception as e:
except Exception as e:
logger.error(f'Failed to query other editions endpoint {other_editions_url}', e)
return None

def _get_oclc_number_from_bibs(self, oclc_number: int, oclc_bibs) -> int:
return [int(edition['oclcNumber']) for edition in oclc_bibs if int(edition['oclcNumber']) != oclc_number]

def query_bibs(self, query: str):
bibs = []

Expand Down Expand Up @@ -135,7 +142,7 @@ def query_bibs(self, query: str):
except Exception as e:
logger.error(f'Failed to query search bibs with query {query}', e)
return bibs

def _search_bibs(self, query: str, offset: int=0):
try:
token = OCLCAuthManager.get_token()
Expand All @@ -155,14 +162,20 @@ def _search_bibs(self, query: str, offset: int=0):
)

if bibs_response.status_code != 200:
logger.warning(f'OCLC search bibs request failed with status {bibs_response.status_code}')
logger.warning(
f'OCLC search bibs request for query {query} failed '
f'with status {bibs_response.status_code}')
try:
oclc_error_type = bibs_response.json()["type"]
logger.debug(f'Query failure reason: {oclc_error_type}')
except (JSONDecodeError, KeyError):
logger.debug('No OCLC error type given')
return None

return bibs_response.json()
except Exception as e:
logger.error(f'Failed to query {bibs_endpoint} with query {query}', e)
logger.error(f'Failed to query {bibs_endpoint} with query {query}. Exception: {e}')
return None

def generate_search_query(self, identifier=None, identifier_type=None, title=None, author=None):
if identifier and identifier_type:
return self._generate_identifier_query(identifier, identifier_type)
Expand Down
11 changes: 8 additions & 3 deletions processes/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
from model import Record
from static.manager import StaticManager

from logger import createLog


logger = createLog(__name__)


class CoreProcess(DBManager, NyplApiManager, RabbitMQManager, RedisManager, StaticManager,
ElasticsearchManager, S3Manager):
Expand All @@ -19,17 +24,17 @@ def addDCDWToUpdateList(self, rec):
existing = self.session.query(Record)\
.filter(Record.source_id == rec.record.source_id).first()
if existing:
print('EXISTING', existing)
logger.debug('Existing record: ' + str(existing))
rec.updateExisting(existing)

try:
self.records.remove(existing)
except KeyError:
print('Record not in current set')
logger.debug('Record not in current set')

self.records.add(existing)
else:
print('NEW', rec.record)
logger.debug('New record: ' + str(rec.record))
self.records.add(rec.record)

if len(self.records) >= self.batchSize:
Expand Down
3 changes: 2 additions & 1 deletion processes/oclcClassify.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from .core import CoreProcess
from managers import ClassifyManager, OCLCCatalogManager
from managers.oclc_catalog import OCLCCatalogError
from managers.oclcClassify import ClassifyError
from mappings.oclcClassify import ClassifyMapping
from mappings.oclc_bib import OCLCBibMapping
Expand Down Expand Up @@ -114,7 +115,7 @@ def frbrizeRecord(self, record):

try:
self.classify_record_by_metadata_v2(identifier, idenType, author, record.title)
except ClassifyError as err:
except (ClassifyError, OCLCCatalogError) as err:
logger.warning('Unable to Classify {}'.format(record))
logger.debug(err.message)

Expand Down

0 comments on commit e6cb3fa

Please sign in to comment.