[XPU] xpu currently disable prefix cache for VL model (#4694)

ddchenhao66 · web-flow · commit 2e7b7a42c2c3 · 2025-10-31T10:37:41.000+08:00
Co-authored-by: ddchenhao66 &lt;dhaochen163.com&gt;
diff --git a/fastdeploy/engine/args_utils.py b/fastdeploy/engine/args_utils.py
@@ -1078,6 +1078,10 @@ def create_engine_config(self, port_availability_check=True) -> FDConfig:
         all_dict = asdict(self)
         model_cfg = ModelConfig(all_dict)
 
+        # XPU currently disable prefix cache for VL model
+        if current_platform.is_xpu() and (self.enable_mm or model_cfg.enable_mm):
+            self.enable_prefix_caching = False
+
         if not model_cfg.is_unified_ckpt and hasattr(model_cfg, "tensor_parallel_size"):
             self.tensor_parallel_size = model_cfg.tensor_parallel_size