Skip to content

Commit

Permalink
Merge pull request #586 from awslabs/batch-s3
Browse files Browse the repository at this point in the history
feat(be): Batch on s3 processing
  • Loading branch information
nowfox committed Mar 26, 2024
2 parents 911b28e + 2588413 commit 6a5cf3e
Show file tree
Hide file tree
Showing 19 changed files with 9,482 additions and 154 deletions.
43 changes: 23 additions & 20 deletions source/containers/document-pii-detection/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,27 +1,30 @@
FROM public.ecr.aws/lambda/python:3.9
# temp stage
FROM python:3.12-slim-bullseye as builder

ARG FUNCTION_DIR="/opt/ml/code"
COPY requirements.txt ${FUNCTION_DIR}/requirements.txt
RUN python3.9 -m pip install -r ${FUNCTION_DIR}/requirements.txt
# Install build dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends gcc g++

COPY main.py parser_factory.py ${FUNCTION_DIR}/
COPY parsers/ ${FUNCTION_DIR}/parsers/
# Install Python dependencies first
COPY requirements.txt .
RUN pip wheel --no-cache-dir --no-deps --wheel-dir /app/wheels -r requirements.txt

ARG OCR_MODEL_URL="https://aws-gcr-solutions-assets.s3.cn-northwest-1.amazonaws.com.cn/ai-solution-kit/infer-ocr-model/standard"
ARG OCR_MODEL_VERSION="v1.0.0"
ARG FD_MODEL_URL="https://aws-gcr-solutions-assets.s3.cn-northwest-1.amazonaws.com.cn/ai-solution-kit/face-detection"
ARG FD_MODEL_VERSION="1.2.0"
# prod stage
FROM python:3.12-slim-bullseye

RUN yum install -y wget
RUN mkdir -p ${FUNCTION_DIR}/ocr_model
RUN wget -c $OCR_MODEL_URL/$OCR_MODEL_VERSION/classifier.onnx -O ${FUNCTION_DIR}/ocr_model/classifier.onnx
RUN wget -c $OCR_MODEL_URL/$OCR_MODEL_VERSION/det_standard.onnx -O ${FUNCTION_DIR}/ocr_model/det_standard.onnx
RUN wget -c $OCR_MODEL_URL/$OCR_MODEL_VERSION/keys_v1.txt -O ${FUNCTION_DIR}/ocr_model/keys_v1.txt
RUN wget -c $OCR_MODEL_URL/$OCR_MODEL_VERSION/rec_standard.onnx -O ${FUNCTION_DIR}/ocr_model/rec_standard.onnx
RUN mkdir -p ${FUNCTION_DIR}/fd_model
RUN wget -c ${FD_MODEL_URL}/${FD_MODEL_VERSION}/det.onnx -O ${FUNCTION_DIR}/fd_model/det.onnx
ARG FUNCTION_DIR="/opt/ml/code"

WORKDIR ${FUNCTION_DIR}

# Command can be overwritten by providing a different command in the template directly.
ENTRYPOINT ["python"]
RUN apt-get update && apt-get install -y --no-install-recommends libmagic1 && rm -rf /var/lib/apt/lists/*

COPY ocr_model/ ${FUNCTION_DIR}/ocr_model/
COPY fd_model/ ${FUNCTION_DIR}/fd_model/
COPY --from=builder /app/wheels ${FUNCTION_DIR}/wheels
RUN pip install --no-cache ${FUNCTION_DIR}/wheels/*

COPY parsers/ ${FUNCTION_DIR}/parsers/
COPY requirements.txt main.py parser_factory.py ${FUNCTION_DIR}/


CMD ["python3", "main.py"]
Binary file not shown.
Loading

0 comments on commit 6a5cf3e

Please sign in to comment.