diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 4aabe2661088..1b6bdabc7a53 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -65,7 +65,6 @@ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/tests /tests COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/examples /examples COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/docker/Dockerfile.rocm /docker/ COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/.buildkite /.buildkite -# Centralized v1 package - copied to both test and final stages COPY --from=build_vllm ${COMMON_WORKDIR}/vllm/vllm/v1 /vllm_v1 # ----------------------- @@ -98,7 +97,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ uv pip install --system hf_transfer ENV HF_HUB_ENABLE_HF_TRANSFER=1 -# Copy in the v1 package +# Copy in the v1 package (for python-only install test group) COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1 # Source code is used in the `python_only_compile.sh` test @@ -130,9 +129,6 @@ RUN --mount=type=bind,from=export_vllm,src=/,target=/install \ && pip uninstall -y vllm \ && uv pip install --system *.whl -# Copy in the v1 package -COPY --from=export_vllm /vllm_v1 /usr/local/lib/python${PYTHON_VERSION}/dist-packages/vllm/v1 - ARG COMMON_WORKDIR # Copy over the benchmark scripts as well diff --git a/requirements/rocm-test.txt b/requirements/rocm-test.txt index ae61d4c6c6a8..394728b67eaa 100644 --- a/requirements/rocm-test.txt +++ b/requirements/rocm-test.txt @@ -70,8 +70,8 @@ torchgeo==0.7.0 mteb==2.1.2 # Data processing -xgrammar @ git+https://github.com/mlc-ai/xgrammar.git@eafd4db51b78acc64b3f0764ef27dfd206c28628 - # Test async scheduling +xgrammar==0.1.27 +# Test async scheduling # Utilities num2words==0.5.14 diff --git a/tests/models/multimodal/generation/conftest.py b/tests/models/multimodal/generation/conftest.py new file mode 100644 index 000000000000..ee3ecdb10fdb --- /dev/null +++ b/tests/models/multimodal/generation/conftest.py @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Pytest configuration for vLLM tests.""" + +import torch + +from vllm.platforms import current_platform + + +def pytest_configure(config): + """Disable Flash/MemEfficient SDP on ROCm to avoid HF + Transformers accuracy issues. + """ + if not current_platform.is_rocm(): + return + + torch.backends.cuda.enable_flash_sdp(False) + torch.backends.cuda.enable_mem_efficient_sdp(False) + torch.backends.cuda.enable_math_sdp(True)