Skip to content

Commit

Permalink
skip v1 tests on non-cuda
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-neuralmagic committed Nov 6, 2024
1 parent 9350d5a commit 7d3c114
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 0 deletions.
5 changes: 5 additions & 0 deletions tests/engine/v1/test_async_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@

from vllm import SamplingParams
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.platforms import current_platform
from vllm.v1.engine.async_llm import AsyncLLM

if not current_platform.is_cuda():
pytest.skip(reason="V1 currently only supported on CUDA.",
allow_module_level=True)

ENGINE_ARGS = AsyncEngineArgs(model="meta-llama/Llama-3.2-1B",
disable_log_requests=False)

Expand Down
6 changes: 6 additions & 0 deletions tests/engine/v1/test_engine_core.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
import time
import uuid

import pytest
from transformers import AutoTokenizer

from vllm import SamplingParams
from vllm.engine.arg_utils import EngineArgs
from vllm.platforms import current_platform
from vllm.usage.usage_lib import UsageContext
from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.async_llm import AsyncLLM
from vllm.v1.engine.core import EngineCore

if not current_platform.is_cuda():
pytest.skip(reason="V1 currently only supported on CUDA.",
allow_module_level=True)

MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)
PROMPT = "Hello my name is Robert and I love quanitzation kernels"
Expand Down
5 changes: 5 additions & 0 deletions tests/engine/v1/test_engine_core_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,16 @@

from vllm import SamplingParams
from vllm.engine.arg_utils import EngineArgs
from vllm.platforms import current_platform
from vllm.usage.usage_lib import UsageContext
from vllm.v1.engine import EngineCoreRequest
from vllm.v1.engine.async_llm import AsyncLLM
from vllm.v1.engine.core_client import EngineCoreClient

if not current_platform.is_cuda():
pytest.skip(reason="V1 currently only supported on CUDA.",
allow_module_level=True)

MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
TOKENIZER = AutoTokenizer.from_pretrained(MODEL_NAME)
PROMPT = "Hello my name is Robert and I love quanitzation kernels"
Expand Down

0 comments on commit 7d3c114

Please sign in to comment.