NOREF-Better error handling & logging (#365)

Return early if no query is formed and move noisy print statements in core process into debug log
NYPL · Sep 16, 2024 · e6cb3fa · e6cb3fa
1 parent 61f0cf5
commit e6cb3fa
Show file tree

Hide file tree

Showing 4 changed files with 35 additions and 15 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -36,6 +36,7 @@
 - Finalizing OCLC implementation
 - Implemented script to aggregate access logs
 - Switching over to classify record by metadata v2
+- Adding more specific logging and exception handling around OCLC manager errors
 
 ## Fixed
 - Changed HATHI_DATAFILES outdated link in development, example, and local yaml files 

diff --git a/managers/oclc_catalog.py b/managers/oclc_catalog.py
@@ -1,6 +1,6 @@
 import os
 import requests
-from requests.exceptions import Timeout, ConnectionError
+from requests.exceptions import Timeout, ConnectionError, JSONDecodeError
 from typing import Optional
 
 from logger import createLog
@@ -96,17 +96,24 @@ def _get_other_editions(self, oclc_number: int, offset: int=0):
             )
 
             if other_editions_response.status_code != 200:
-                logger.warning(f'OCLC other editions request failed with status {other_editions_response.status_code}')
+                logger.warning(
+                            f'OCLC search bibs request for OCLC no {oclc_number} failed '
+                            f'with status {other_editions_response.status_code}')
+                try:
+                            oclc_error_type = other_editions_response.json()["type"]
+                            logger.debug(f'{oclc_number} request failure reason: {oclc_error_type}')
+                except (JSONDecodeError, KeyError):
+                    logger.debug(f'No OCLC error type given for {oclc_number} request')
                 return None
-            
+
             return other_editions_response.json()
-        except Exception as e: 
+        except Exception as e:
             logger.error(f'Failed to query other editions endpoint {other_editions_url}', e)
             return None
-        
+
     def _get_oclc_number_from_bibs(self, oclc_number: int, oclc_bibs) -> int:
         return [int(edition['oclcNumber']) for edition in oclc_bibs if int(edition['oclcNumber']) != oclc_number]
-  
+
     def query_bibs(self, query: str):
         bibs = []
 
@@ -135,7 +142,7 @@ def query_bibs(self, query: str):
         except Exception as e:
             logger.error(f'Failed to query search bibs with query {query}', e)
             return bibs
-    
+
     def _search_bibs(self, query: str, offset: int=0):
         try:
             token = OCLCAuthManager.get_token()
@@ -155,14 +162,20 @@ def _search_bibs(self, query: str, offset: int=0):
             )
 
             if bibs_response.status_code != 200:
-                logger.warning(f'OCLC search bibs request failed with status {bibs_response.status_code}')
+                logger.warning(
+                            f'OCLC search bibs request for query {query} failed '
+                            f'with status {bibs_response.status_code}')
+                try:
+                            oclc_error_type = bibs_response.json()["type"]
+                            logger.debug(f'Query failure reason: {oclc_error_type}')
+                except (JSONDecodeError, KeyError):
+                    logger.debug('No OCLC error type given')
                 return None
-
             return bibs_response.json()
         except Exception as e:
-            logger.error(f'Failed to query {bibs_endpoint} with query {query}', e)
+            logger.error(f'Failed to query {bibs_endpoint} with query {query}. Exception: {e}')
             return None
-    
+
     def generate_search_query(self, identifier=None, identifier_type=None, title=None, author=None):
         if identifier and identifier_type:
             return self._generate_identifier_query(identifier, identifier_type)

diff --git a/processes/core.py b/processes/core.py
@@ -2,6 +2,11 @@
 from model import Record
 from static.manager import StaticManager
 
+from logger import createLog
+
+
+logger = createLog(__name__)
+
 
 class CoreProcess(DBManager, NyplApiManager, RabbitMQManager, RedisManager, StaticManager,
                   ElasticsearchManager, S3Manager):
@@ -19,17 +24,17 @@ def addDCDWToUpdateList(self, rec):
         existing = self.session.query(Record)\
             .filter(Record.source_id == rec.record.source_id).first()
         if existing:
-            print('EXISTING', existing)
+            logger.debug('Existing record: ' + str(existing))
             rec.updateExisting(existing)
 
             try:
                 self.records.remove(existing)
             except KeyError:
-                print('Record not in current set')
+                logger.debug('Record not in current set')
 
             self.records.add(existing)
         else:
-            print('NEW', rec.record)
+            logger.debug('New record: ' + str(rec.record))
             self.records.add(rec.record)
 
         if len(self.records) >= self.batchSize:

diff --git a/processes/oclcClassify.py b/processes/oclcClassify.py
@@ -4,6 +4,7 @@
 
 from .core import CoreProcess
 from managers import ClassifyManager, OCLCCatalogManager
+from managers.oclc_catalog import OCLCCatalogError
 from managers.oclcClassify import ClassifyError
 from mappings.oclcClassify import ClassifyMapping
 from mappings.oclc_bib import OCLCBibMapping
@@ -114,7 +115,7 @@ def frbrizeRecord(self, record):
 
             try:
                 self.classify_record_by_metadata_v2(identifier, idenType, author, record.title)
-            except ClassifyError as err:
+            except (ClassifyError, OCLCCatalogError) as err:
                 logger.warning('Unable to Classify {}'.format(record))
                 logger.debug(err.message)