diff --git a/deploy/genai_vllm_server_docker/Dockerfile b/deploy/genai_vllm_server_docker/Dockerfile
index 8dfa80d71..efad42ebe 100644
--- a/deploy/genai_vllm_server_docker/Dockerfile
+++ b/deploy/genai_vllm_server_docker/Dockerfile
@@ -8,9 +8,14 @@ ENV PIP_NO_CACHE_DIR=0
 ENV PYTHONUNBUFFERED=1
 ENV PYTHONDONTWRITEBYTECODE=1
 
-RUN python -m pip install 'paddlex>=3.3.5,<3.4'
+ARG PADDLEX_VERSION=">=3.3.6,<3.4"
+RUN python -m pip install "paddlex${PADDLEX_VERSION}"
 
-RUN python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
+ARG BUILD_FOR_SM120=false
+RUN if [ "${BUILD_FOR_SM120}" = 'true' ]; then \
+        python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.3+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
+    else \
+        python -m pip install https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/v0.3.14/flash_attn-2.8.2+cu128torch2.8-cp310-cp310-linux_x86_64.whl \
     && paddlex --install genai-vllm-server
 
 EXPOSE 8080
diff --git a/deploy/genai_vllm_server_docker/build.sh b/deploy/genai_vllm_server_docker/build.sh
index 27a82b359..74019f5c1 100755
--- a/deploy/genai_vllm_server_docker/build.sh
+++ b/deploy/genai_vllm_server_docker/build.sh
@@ -1,9 +1,37 @@
 #!/usr/bin/env bash
 
+paddlex_version='>=3.3.6,<3.4'
+build_for_sm120='false'
+tag_suffix='latest'
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --pdx-version)
+            paddlex_version="==$2"
+            shift
+            shift
+            ;;
+        --sm120)
+            build_for_sm120='true'
+            shift
+            ;;
+        --tag-suffix)
+            tag_suffix="$2"
+            shift
+            shift
+            ;;
+        *)
+            echo "Unknown option: $1"
+            exit 1
+            ;;
+    esac
+done
+
 docker build \
-    -t "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server:${1:latest}" \
+    -t "ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/paddlex-genai-vllm-server:${tag_suffix}" \
+    --build-arg PADDLEX_VERSION="${paddlex_version}" \
+    --build-arg BUILD_FOR_SM120="${build_for_sm120}" \
     --build-arg http_proxy="${http_proxy}" \
     --build-arg https_proxy="${https_proxy}" \
     --build-arg no_proxy="${no_proxy}" \
-    --build-arg PIP_INDEX_URL="${PIP_INDEX_URL}" \
     .
diff --git a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml
index 43335a9de..280a45460 100644
--- a/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml
+++ b/deploy/hps/sdk/pipelines/PaddleOCR-VL/server/pipeline_config.yaml
@@ -74,7 +74,7 @@ SubModules:
     module_name: vl_recognition
     model_name: PaddleOCR-VL-0.9B
     model_dir: null
-    batch_size: 2048
+    batch_size: 4096
     genai_config:
       backend: native
 
diff --git a/deploy/hps/sdk/versions.json b/deploy/hps/sdk/versions.json
index 08820eeef..dab7ddec6 100644
--- a/deploy/hps/sdk/versions.json
+++ b/deploy/hps/sdk/versions.json
@@ -20,7 +20,7 @@
     "PP-ChatOCRv3-doc": "0.3.1",
     "PP-ChatOCRv4-doc": "0.4.1",
     "PP-DocTranslation": "0.1.1",
-    "PaddleOCR-VL": "0.1.0",
+    "PaddleOCR-VL": "0.1.1",
     "PP-ShiTuV2": "0.1.0",
     "rotated_object_detection": "0.1.0",
     "seal_recognition": "0.2.1",
diff --git a/paddlex/configs/pipelines/PaddleOCR-VL.yaml b/paddlex/configs/pipelines/PaddleOCR-VL.yaml
index 43335a9de..280a45460 100644
--- a/paddlex/configs/pipelines/PaddleOCR-VL.yaml
+++ b/paddlex/configs/pipelines/PaddleOCR-VL.yaml
@@ -74,7 +74,7 @@ SubModules:
     module_name: vl_recognition
     model_name: PaddleOCR-VL-0.9B
     model_dir: null
-    batch_size: 2048
+    batch_size: 4096
     genai_config:
       backend: native