From 9b2ca6d10d589e1b9ad1fd826554330af9086aad Mon Sep 17 00:00:00 2001 From: Ryan Marten Date: Mon, 16 Dec 2024 19:52:59 -0800 Subject: [PATCH 1/4] change logging for cache verification --- .../base_request_processor.py | 35 +++++++++++-------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/bespokelabs/curator/request_processor/base_request_processor.py b/src/bespokelabs/curator/request_processor/base_request_processor.py index 08743532..539895d0 100644 --- a/src/bespokelabs/curator/request_processor/base_request_processor.py +++ b/src/bespokelabs/curator/request_processor/base_request_processor.py @@ -102,27 +102,32 @@ def _verify_existing_request_files( for i in range(expected_num_files): req_f = os.path.join(working_dir, f"requests_{i}.jsonl") meta_f = os.path.join(working_dir, f"metadata_{i}.json") - if not os.path.exists(req_f) or not os.path.exists(meta_f): + + if not os.path.exists(req_f): incomplete_files.append(i) - else: - with open(req_f, "r") as f: - data = f.read() - num_jobs = len(data.splitlines()) + continue - with open(meta_f, "r") as f: - metadata = json.load(f) - - expected_num_jobs = metadata["num_jobs"] - if num_jobs != expected_num_jobs: - incomplete_files.append(i) + if not os.path.exists(meta_f): + logger.debug(f"Cache missing metadata file {meta_f} for request file {req_f}") + incomplete_files.append(i) + continue + + with open(req_f, "r") as f: + data = f.read() + num_jobs = len(data.splitlines()) + + with open(meta_f, "r") as f: + metadata = json.load(f) + + expected_num_jobs = metadata["num_jobs"] + if num_jobs != expected_num_jobs: + logger.debug(f"Request file {req_f} has {num_jobs} jobs, but metadata file {meta_f} has {expected_num_jobs} jobs") + incomplete_files.append(i) - logger.info( - f"Cache missing {len(incomplete_files)} complete request files - regenerating missing ones." - ) return incomplete_files except: - logger.info( + logger.debug( "Cache verification failed for unexpected reasons - regenerating all request files." ) incomplete_files = list(range(expected_num_files)) From b4a5d4f4d90e06c518f419ae095aa465d59d487b Mon Sep 17 00:00:00 2001 From: Ryan Marten Date: Mon, 16 Dec 2024 19:53:57 -0800 Subject: [PATCH 2/4] linting --- .../curator/request_processor/base_request_processor.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/bespokelabs/curator/request_processor/base_request_processor.py b/src/bespokelabs/curator/request_processor/base_request_processor.py index 539895d0..c50c5d61 100644 --- a/src/bespokelabs/curator/request_processor/base_request_processor.py +++ b/src/bespokelabs/curator/request_processor/base_request_processor.py @@ -102,7 +102,7 @@ def _verify_existing_request_files( for i in range(expected_num_files): req_f = os.path.join(working_dir, f"requests_{i}.jsonl") meta_f = os.path.join(working_dir, f"metadata_{i}.json") - + if not os.path.exists(req_f): incomplete_files.append(i) continue @@ -111,7 +111,7 @@ def _verify_existing_request_files( logger.debug(f"Cache missing metadata file {meta_f} for request file {req_f}") incomplete_files.append(i) continue - + with open(req_f, "r") as f: data = f.read() num_jobs = len(data.splitlines()) @@ -121,7 +121,9 @@ def _verify_existing_request_files( expected_num_jobs = metadata["num_jobs"] if num_jobs != expected_num_jobs: - logger.debug(f"Request file {req_f} has {num_jobs} jobs, but metadata file {meta_f} has {expected_num_jobs} jobs") + logger.debug( + f"Request file {req_f} has {num_jobs} jobs, but metadata file {meta_f} has {expected_num_jobs} jobs" + ) incomplete_files.append(i) return incomplete_files From 84d5811ebdf53821230170cc908496b9fbe49882 Mon Sep 17 00:00:00 2001 From: Ryan Marten Date: Mon, 16 Dec 2024 19:55:46 -0800 Subject: [PATCH 3/4] change to warning --- .../curator/request_processor/base_request_processor.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/bespokelabs/curator/request_processor/base_request_processor.py b/src/bespokelabs/curator/request_processor/base_request_processor.py index c50c5d61..eb7f8302 100644 --- a/src/bespokelabs/curator/request_processor/base_request_processor.py +++ b/src/bespokelabs/curator/request_processor/base_request_processor.py @@ -108,7 +108,7 @@ def _verify_existing_request_files( continue if not os.path.exists(meta_f): - logger.debug(f"Cache missing metadata file {meta_f} for request file {req_f}") + logger.warning(f"Cache missing metadata file {meta_f} for request file {req_f}") incomplete_files.append(i) continue @@ -121,7 +121,7 @@ def _verify_existing_request_files( expected_num_jobs = metadata["num_jobs"] if num_jobs != expected_num_jobs: - logger.debug( + logger.warning( f"Request file {req_f} has {num_jobs} jobs, but metadata file {meta_f} has {expected_num_jobs} jobs" ) incomplete_files.append(i) @@ -129,9 +129,7 @@ def _verify_existing_request_files( return incomplete_files except: - logger.debug( - "Cache verification failed for unexpected reasons - regenerating all request files." - ) + logger.warning("Cache verification failed due to {e} - regenerating all request files.") incomplete_files = list(range(expected_num_files)) return incomplete_files From 349093e20e52c5e619b34745c497f752c28e1f19 Mon Sep 17 00:00:00 2001 From: Ryan Marten Date: Mon, 16 Dec 2024 19:56:29 -0800 Subject: [PATCH 4/4] fix exception logging --- .../curator/request_processor/base_request_processor.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/bespokelabs/curator/request_processor/base_request_processor.py b/src/bespokelabs/curator/request_processor/base_request_processor.py index eb7f8302..ca1b5a81 100644 --- a/src/bespokelabs/curator/request_processor/base_request_processor.py +++ b/src/bespokelabs/curator/request_processor/base_request_processor.py @@ -128,8 +128,10 @@ def _verify_existing_request_files( return incomplete_files - except: - logger.warning("Cache verification failed due to {e} - regenerating all request files.") + except Exception as e: + logger.warning( + f"Cache verification failed due to {e} - regenerating all request files." + ) incomplete_files = list(range(expected_num_files)) return incomplete_files