diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py index 72ca1f7c820..cf538c0177a 100644 --- a/fastdeploy/engine/common_engine.py +++ b/fastdeploy/engine/common_engine.py @@ -981,6 +981,8 @@ def _fetch_request(): LoggingEventName.ASK_DECODE_RESOURCE_START, task.request_id, getattr(task, "user", "") ) task.metrics.ask_decode_resource_start_time = time.time() + if envs.FD_PD_LOG_REQUEST: + self.llm_logger.info(f"[PD_LOG] P sends Request: {task.to_dict()}") while True: self.split_connector.send_splitwise_tasks([task], task.idx) status, msg = self.split_connector.check_decode_allocated(task) @@ -1011,6 +1013,8 @@ def _fetch_request(): LoggingEventName.ASK_DECODE_RESOURCE_START, task.request_id, getattr(task, "user", "") ) task.metrics.ask_decode_resource_start_time = time.time() + if envs.FD_PD_LOG_REQUEST: + self.llm_logger.info(f"[PD_LOG] P sends Request: {task.to_dict()}") self.split_connector.send_splitwise_tasks([task], task.idx) for task in tasks: @@ -2095,6 +2099,8 @@ def _fetch_requests(): f"D has received tasks to preallocate resource for tasks: {[task.request_id for task in tasks]}" ) for task in tasks: + if envs.FD_PD_LOG_REQUEST: + self.llm_logger.info(f"[PD_LOG] D received Request: {task.to_dict()}") task.metrics.decode_recv_req_time = time.time() allocate_resource_requests.extend(tasks) elif isinstance(tasks[0], RequestOutput): diff --git a/fastdeploy/envs.py b/fastdeploy/envs.py index 83859610dae..2067d84b006 100644 --- a/fastdeploy/envs.py +++ b/fastdeploy/envs.py @@ -269,6 +269,8 @@ def _validate_split_kv_size(value: int) -> int: "FD_DETERMINISTIC_LOG_MODE": lambda: bool(int(os.getenv("FD_DETERMINISTIC_LOG_MODE", "0"))), # Whether to use PD REORDER, can set 0 or 1 "FD_PD_REORDER": lambda: int(os.getenv("FD_PD_REORDER", "0")), + # Whether to enable PD disaggregation request logging on decode side + "FD_PD_LOG_REQUEST": lambda: int(os.getenv("FD_PD_LOG_REQUEST", "0")), # Whether to probe MoE routing probabilities and use Fleet's fused SwiGLU kernel. "FD_MOE_PROB_IN_ADVANCE": lambda: bool(int(os.getenv("FD_MOE_PROB_IN_ADVANCE", "0"))), # Whether to use batch send data in zmq