Skip to content

Commit

Permalink
[V1] Always set enable_chunked_prefill = True for V1
Browse files Browse the repository at this point in the history
Signed-off-by: Woosuk Kwon <[email protected]>
  • Loading branch information
WoosukKwon committed Dec 10, 2024
1 parent fe2e10c commit 7adf83c
Showing 1 changed file with 13 additions and 8 deletions.
21 changes: 13 additions & 8 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1225,19 +1225,24 @@ def _override_v1_engine_args(self, usage_context: UsageContext) -> None:
"""
assert envs.VLLM_USE_V1, "V1 is not enabled"

# V1 always uses chunked prefills.
self.enable_chunked_prefill = True
# When no user override, set the default values based on the usage
# context.
# TODO(woosuk): Tune the default values for different hardware.
if self.max_num_batched_tokens is None:
# When no user override, set the default values based on the
# usage context.
if usage_context == UsageContext.LLM_CLASS:
logger.warning("Setting max_num_batched_tokens to 8192 "
"for LLM_CLASS usage context.")
self.max_num_seqs = 1024
self.max_num_batched_tokens = 8192
elif usage_context == UsageContext.OPENAI_API_SERVER:
logger.warning("Setting max_num_batched_tokens to 2048 "
"for OPENAI_API_SERVER usage context.")
self.max_num_seqs = 1024
self.max_num_batched_tokens = 2048
logger.warning(
"Setting max_num_batched_tokens to %d for %s usage context.",
self.max_num_batched_tokens, usage_context.value)
# NOTE(woosuk): Increase max_num_seqs since the default value (256) is
# too small to achieve the best performance in V1.
self.max_num_seqs = 1024
logger.warning("Setting max_num_seqs to %d for %s usage context.",
self.max_num_seqs, usage_context.value)

def _override_v1_engine_config(self, engine_config: VllmConfig) -> None:
"""
Expand Down

0 comments on commit 7adf83c

Please sign in to comment.