From 9b7e7289cfc6f5475620c8b88e845e4ed580d8d3 Mon Sep 17 00:00:00 2001 From: Daniel von Atzigen Date: Wed, 16 Oct 2024 13:21:35 +0200 Subject: [PATCH] Check if file exists before processing --- api.py | 17 +++++++++++++---- aws/aws.py | 11 +++++++++++ utils/task.py | 2 +- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/api.py b/api.py index 0ac4045..e77233e 100644 --- a/api.py +++ b/api.py @@ -2,8 +2,6 @@ import os import shutil import uuid -from random import randint -from time import sleep from typing import Annotated from fastapi import FastAPI, Depends, status, HTTPException, BackgroundTasks, Response @@ -41,7 +39,18 @@ def start( detail={"message": "input must be a PDF file"} ) - task.start(payload.file, background_tasks, lambda: process(payload, settings)) + aws_client = aws.connect(settings) + has_file = aws_client.exists_file( + settings.s3_input_bucket, + f'{settings.s3_input_folder}{payload.file}', + ) + if not has_file: + raise HTTPException( + status_code=status.HTTP_422_BAD_REQUEST, + detail={"message": "file does not exist"} + ) + + task.start(payload.file, background_tasks, lambda: process(payload, aws_client, settings)) return Response(status_code=status.HTTP_204_NO_CONTENT) @@ -84,6 +93,7 @@ def collect( def process( payload: StartPayload, + aws_client: aws.Client, settings: Annotated[ApiSettings, Depends(api_settings)], ): if settings.skip_processing: @@ -98,7 +108,6 @@ def process( input_path = os.path.join(tmp_dir, "input.pdf") output_path = os.path.join(tmp_dir, "output.pdf") - aws_client = aws.connect(settings) aws.load_file( aws_client.bucket(settings.s3_input_bucket), f'{settings.s3_input_folder}{payload.file}', diff --git a/aws/aws.py b/aws/aws.py index 415cbd2..6b40ca4 100644 --- a/aws/aws.py +++ b/aws/aws.py @@ -1,6 +1,7 @@ from dataclasses import dataclass import boto3 +from botocore.exceptions import ClientError from mypy_boto3_s3 import S3ServiceResource from mypy_boto3_s3.service_resource import Bucket from mypy_boto3_textract import TextractClient as Textractor @@ -18,6 +19,16 @@ class Client: def bucket(self, name: str) -> Bucket: return self.s3.Bucket(name) + def exists_file(self, bucket_name: str, key: str) -> bool: + try: + self.s3.Object(bucket_name, key).load() + return True + except ClientError as e: + if e.response['Error']['Code'] == '404': + return False + else: + raise e + def connect(settings: ApiSettings) -> Client: has_profile = is_set(settings.aws_profile) diff --git a/utils/task.py b/utils/task.py index afa85c6..429f9ad 100644 --- a/utils/task.py +++ b/utils/task.py @@ -55,7 +55,7 @@ def run(file: str, target: typing.Callable[[], Result]): value = target() result = Output(ok=True, value=value) logging.info(f"Task for file '{file}' has been completed.") - except RuntimeError as e: + except Exception as e: logging.exception(f"Processing of '{file}' failed") result = Output(ok=False, value=e)