We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4a8a567 commit 1974880Copy full SHA for 1974880
vllm/v1/worker/gpu_model_runner.py
@@ -3482,7 +3482,10 @@ def _dummy_run(
3482
3483
if self.speculative_config and self.speculative_config.use_eagle():
3484
assert isinstance(self.drafter, EagleProposer)
3485
- use_cudagraphs = cudagraph_runtime_mode == CUDAGraphMode.PIECEWISE
+ use_cudagraphs = (
3486
+ cudagraph_runtime_mode == CUDAGraphMode.PIECEWISE
3487
+ and not self.speculative_config.enforce_eager
3488
+ )
3489
self.drafter.dummy_run(num_tokens, use_cudagraphs=use_cudagraphs)
3490
3491
# This is necessary to avoid blocking DP.
0 commit comments