We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 0f75b62 commit 096d87dCopy full SHA for 096d87d
fastdeploy/config.py
@@ -1556,6 +1556,10 @@ def postprocess(self):
1556
self.graph_opt_config.use_cudagraph = False
1557
logger.info(f"CUDAGraph only support on GPU, current device type is {self.device_config.device_type}!")
1558
1559
+ if self.model_config.enable_mm and self.graph_opt_config.use_cudagraph:
1560
+ self.cache_config.enable_prefix_caching = False
1561
+ logger.info("Multi-modal models do not support prefix caching when using CUDAGraph!")
1562
+
1563
if self.scheduler_config.splitwise_role == "mixed":
1564
self.model_config.moe_phase = MoEPhase(phase="prefill")
1565
elif self.scheduler_config.splitwise_role == "prefill":
0 commit comments