Skip to content

Commit 8b1fa9a

Browse files
committed
feat(core): add a retry mechanism for health checks
1 parent 1b0ea7e commit 8b1fa9a

File tree

1 file changed

+28
-13
lines changed
  • src/server/core/acontext_core/infra

1 file changed

+28
-13
lines changed

src/server/core/acontext_core/infra/s3.py

Lines changed: 28 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -345,24 +345,39 @@ async def get_object_metadata(
345345
except Exception as e:
346346
_handle_unexpected_error(e, bucket_name, key)
347347

348-
async def health_check(self) -> bool:
348+
async def health_check(self, max_retries: int = 5, retry_delay: float = 2.0) -> bool:
349349
"""
350-
Perform health check with bucket HEAD operation.
350+
Perform health check with bucket HEAD operation with retry logic.
351+
352+
Args:
353+
max_retries: Maximum number of retry attempts (default: 5)
354+
retry_delay: Delay in seconds between retries (default: 2.0)
351355
352356
Returns:
353357
bool: True if S3 is accessible, False otherwise
354358
"""
355-
try:
356-
async with self.get_client() as client:
357-
await client.head_bucket(Bucket=self.bucket)
358-
logger.debug(f"S3 health check passed - bucket: {self.bucket}")
359-
return True
360-
361-
except (ClientError, NoCredentialsError, Exception) as e:
362-
logger.error(
363-
f"S3 health check failed - bucket: {self.bucket}, error: {str(e)}"
364-
)
365-
return False
359+
for attempt in range(1, max_retries + 1):
360+
try:
361+
async with self.get_client() as client:
362+
await client.head_bucket(Bucket=self.bucket)
363+
logger.info(f"S3 health check passed - bucket: {self.bucket}")
364+
return True
365+
366+
except (ClientError, NoCredentialsError, Exception) as e:
367+
if attempt < max_retries:
368+
logger.warning(
369+
f"S3 health check failed (attempt {attempt}/{max_retries}) - "
370+
f"bucket: {self.bucket}, error: {str(e)}, retrying in {retry_delay}s..."
371+
)
372+
await asyncio.sleep(retry_delay)
373+
else:
374+
logger.error(
375+
f"S3 health check failed after {max_retries} attempts - "
376+
f"bucket: {self.bucket}, error: {str(e)}"
377+
)
378+
return False
379+
380+
return False
366381

367382
def get_connection_status(self) -> Dict[str, Any]:
368383
"""

0 commit comments

Comments
 (0)