diff --git a/deploy/genai_vllm_server_docker/Dockerfile b/deploy/genai_vllm_server_docker/Dockerfile index 8dfa80d71..efad42ebe 100644 --- a/deploy/genai_vllm_server_docker/Dockerfile +++ b/deploy/genai_vllm_server_docker/Dockerfile @@ -8,9 +8,14 @@ ENV PIP_NO_CACHE_DIR=0 ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 -RUN python -m pip install 'paddlex>=3.3.5,<3.4' +ARG PADDLEX_VERSION=">=3.3.6,<3.4" +RUN python -m pip install "paddlex${PADDLEX_VERSION}" -RUN python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl \ +ARG BUILD_FOR_SM120=false +RUN if [ "${BUILD_FOR_SM120}" = 'true' ]; then \ + python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.3+cu128torch2.8-cp310-cp310-linux_x86_64.whl \ + else \ + python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl \ && paddlex --install genai-vllm-server EXPOSE 8080 diff --git a/deploy/genai_vllm_server_docker/build.sh b/deploy/genai_vllm_server_docker/build.sh index 27a82b359..74019f5c1 100755 --- a/deploy/genai_vllm_server_docker/build.sh +++ b/deploy/genai_vllm_server_docker/build.sh @@ -1,9 +1,37 @@ #!/usr/bin/env bash +paddlex_version='>=3.3.6,<3.4' +build_for_sm120='false' +tag_suffix='latest' + +while [[ $# -gt 0 ]]; do + case $1 in + --pdx-version) + paddlex_version="==$2" + shift + shift + ;; + --sm120) + build_for_sm120='true' + shift + ;; + --tag-suffix) + tag_suffix="$2" + shift + shift + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + docker build \ - -t "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server:${1:latest}" \ + -t "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server:${tag_suffix}" \ + --build-arg PADDLEX_VERSION="${paddlex_version}" \ + --build-arg BUILD_FOR_SM120="${build_for_sm120}" \ --build-arg http_proxy="${http_proxy}" \ --build-arg https_proxy="${https_proxy}" \ --build-arg no_proxy="${no_proxy}" \ - --build-arg PIP_INDEX_URL="${PIP_INDEX_URL}" \ . diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml index 43335a9de..280a45460 100644 --- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml +++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml @@ -74,7 +74,7 @@ SubModules: module_name: vl_recognition model_name: PaddleOCR-VL-0.9B model_dir: null - batch_size: 2048 + batch_size: 4096 genai_config: backend: native diff --git a/deploy/hps/sdk/versions.json b/deploy/hps/sdk/versions.json index 08820eeef..dab7ddec6 100644 --- a/deploy/hps/sdk/versions.json +++ b/deploy/hps/sdk/versions.json @@ -20,7 +20,7 @@ "PP-ChatOCRv3-doc": "0.3.1", "PP-ChatOCRv4-doc": "0.4.1", "PP-DocTranslation": "0.1.1", - "PaddleOCR-VL": "0.1.0", + "PaddleOCR-VL": "0.1.1", "PP-ShiTuV2": "0.1.0", "rotated_object_detection": "0.1.0", "seal_recognition": "0.2.1", diff --git a/paddlex/configs/pipelines/PaddleOCR-VL.yaml b/paddlex/configs/pipelines/PaddleOCR-VL.yaml index 43335a9de..280a45460 100644 --- a/paddlex/configs/pipelines/PaddleOCR-VL.yaml +++ b/paddlex/configs/pipelines/PaddleOCR-VL.yaml @@ -74,7 +74,7 @@ SubModules: module_name: vl_recognition model_name: PaddleOCR-VL-0.9B model_dir: null - batch_size: 2048 + batch_size: 4096 genai_config: backend: native