Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions deploy/genai_vllm_server_docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,14 @@ ENV PIP_NO_CACHE_DIR=0
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1

RUN python -m pip install 'paddlex>=3.3.5,<3.4'
ARG PADDLEX_VERSION=">=3.3.6,<3.4"
RUN python -m pip install "paddlex${PADDLEX_VERSION}"

RUN python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
ARG BUILD_FOR_SM120=false
RUN if [ "${BUILD_FOR_SM120}" = 'true' ]; then \
python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.3+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
else \
python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
&& paddlex --install genai-vllm-server

EXPOSE 8080
Expand Down
32 changes: 30 additions & 2 deletions deploy/genai_vllm_server_docker/build.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,37 @@
#!/usr/bin/env bash

paddlex_version='>=3.3.6,<3.4'
build_for_sm120='false'
tag_suffix='latest'

while [[ $# -gt 0 ]]; do
case $1 in
--pdx-version)
paddlex_version="==$2"
shift
shift
;;
--sm120)
build_for_sm120='true'
shift
;;
--tag-suffix)
tag_suffix="$2"
shift
shift
;;
*)
echo "Unknown option: $1"
exit 1
;;
esac
done

docker build \
-t "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server:${1:latest}" \
-t "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server:${tag_suffix}" \
--build-arg PADDLEX_VERSION="${paddlex_version}" \
--build-arg BUILD_FOR_SM120="${build_for_sm120}" \
--build-arg http_proxy="${http_proxy}" \
--build-arg https_proxy="${https_proxy}" \
--build-arg no_proxy="${no_proxy}" \
--build-arg PIP_INDEX_URL="${PIP_INDEX_URL}" \
.
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ SubModules:
module_name: vl_recognition
model_name: PaddleOCR-VL-0.9B
model_dir: null
batch_size: 2048
batch_size: 4096
genai_config:
backend: native

Expand Down
2 changes: 1 addition & 1 deletion deploy/hps/sdk/versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"PP-ChatOCRv3-doc": "0.3.1",
"PP-ChatOCRv4-doc": "0.4.1",
"PP-DocTranslation": "0.1.1",
"PaddleOCR-VL": "0.1.0",
"PaddleOCR-VL": "0.1.1",
"PP-ShiTuV2": "0.1.0",
"rotated_object_detection": "0.1.0",
"seal_recognition": "0.2.1",
Expand Down
2 changes: 1 addition & 1 deletion paddlex/configs/pipelines/PaddleOCR-VL.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ SubModules:
module_name: vl_recognition
model_name: PaddleOCR-VL-0.9B
model_dir: null
batch_size: 2048
batch_size: 4096
genai_config:
backend: native

Expand Down