Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 3 additions & 12 deletions fastdeploy/worker/worker_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,7 @@
from fastdeploy.eplb.experts_manager import RedundantExpertManager
from fastdeploy.eplb.utils import RearrangeExpertState
from fastdeploy.inter_communicator import EngineWorkerQueue as TaskQueue
from fastdeploy.inter_communicator import (
ExistTaskStatus,
IPCSignal,
ModelWeightsStatus,
shared_memory_exists,
)
from fastdeploy.inter_communicator import ExistTaskStatus, IPCSignal, ModelWeightsStatus
from fastdeploy.model_executor.layers.quantization import parse_quant_config
from fastdeploy.model_executor.utils import v1_loader_support
from fastdeploy.platforms import current_platform
Expand Down Expand Up @@ -544,16 +539,12 @@ def initialize_kv_cache(self) -> None:
def graph_optimize_and_warm_up_model(self) -> None:
self.worker.graph_optimize_and_warm_up_model()
# reset cache_messager prefilled_step signal
if self.scheduler_config.splitwise_role == "prefill":
if not envs.ENABLE_V1_KVCACHE_SCHEDULER and self.scheduler_config.splitwise_role == "prefill":
gpu_id = self.worker.model_runner.device_id
prefilled_step_name = f"splitwise_complete_prefilled_step_{self.local_rank}"
prefilled_step_idx_data = np.zeros(shape=[1], dtype=np.int32)
step_shm_value = IPCSignal(
name=prefilled_step_name,
array=prefilled_step_idx_data,
dtype=np.int32,
suffix=gpu_id,
create=not shared_memory_exists(prefilled_step_name),
name=prefilled_step_name, array=prefilled_step_idx_data, dtype=np.int32, suffix=gpu_id, create=False
)
step_shm_value.value[0] = -1

Expand Down
Loading