Skip to content

Commit 772950e

Browse files
Improve monitoring stability
1 parent ed96fe5 commit 772950e

File tree

1 file changed

+24
-20
lines changed
  • archive_query_log/monitoring

1 file changed

+24
-20
lines changed

archive_query_log/monitoring/home.py

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -146,26 +146,30 @@ def _get_warc_cache_statistics(
146146
for file_path in tqdm(
147147
file_paths, desc="Compute WARC cache statistics", unit="file"
148148
):
149-
disk_size_bytes += file_path.stat().st_size
150-
last_modified = max(
151-
last_modified,
152-
file_path.stat().st_mtime,
153-
)
154-
# FIXME: Counting WARC records takes too long at the moment due to the large number of files. Replace this again with record counting once the number of files is reduced.
155-
# try:
156-
# with gzip_open(file_path, mode="rb") as gzip_file:
157-
# iterator = ArchiveIterator(
158-
# fileobj=gzip_file,
159-
# no_record_parse=True,
160-
# )
161-
# warc_count += sum(
162-
# 1 for record in iterator if record.rec_type == "request"
163-
# )
164-
# except BadGzipFile:
165-
# warn(f"Invalid gzip file: {file_path}")
166-
# # Ignore invalid gzip files.
167-
# pass
168-
warc_count += 1
149+
try:
150+
disk_size_bytes += file_path.stat().st_size
151+
last_modified = max(
152+
last_modified,
153+
file_path.stat().st_mtime,
154+
)
155+
# FIXME: Counting WARC records takes too long at the moment due to the large number of files. Replace this again with record counting once the number of files is reduced.
156+
# try:
157+
# with gzip_open(file_path, mode="rb") as gzip_file:
158+
# iterator = ArchiveIterator(
159+
# fileobj=gzip_file,
160+
# no_record_parse=True,
161+
# )
162+
# warc_count += sum(
163+
# 1 for record in iterator if record.rec_type == "request"
164+
# )
165+
# except BadGzipFile:
166+
# warn(f"Invalid gzip file: {file_path}")
167+
# # Ignore invalid gzip files.
168+
# pass
169+
warc_count += 1
170+
except FileNotFoundError:
171+
# Ignore files that have been deleted while processing.
172+
pass
169173

170174
statistics = Statistics(
171175
name=name,

0 commit comments

Comments
 (0)