|
47 | 47 | from fastdeploy.plugins.token_processor import load_token_processor_plugins |
48 | 48 | from fastdeploy.splitwise.internal_adapter_utils import InternalAdapter |
49 | 49 | from fastdeploy.splitwise.splitwise_connector import SplitwiseConnector |
50 | | -from fastdeploy.utils import EngineError, envs, get_logger, llm_logger |
| 50 | +from fastdeploy.utils import EngineError, envs, get_logger, llm_logger, trace_logger |
51 | 51 |
|
52 | 52 | try: |
53 | 53 | TokenProcessor = load_token_processor_plugins() |
@@ -384,7 +384,17 @@ def insert_tasks(self, tasks, current_id=-1, allocated=False): |
384 | 384 |
|
385 | 385 | for item in tasks: |
386 | 386 | item.schedule_start_time = time.time() |
387 | | - |
| 387 | + trace_logger.info( |
| 388 | + "resource allocate start", |
| 389 | + extra={ |
| 390 | + "attributes": { |
| 391 | + "request_id": f"{item.request_id}", |
| 392 | + "user_id": f"{getattr(item, 'user', '')}", |
| 393 | + "event": "RESOURCE_ALLOCATE_START", |
| 394 | + "stage": "SCHEDULE", |
| 395 | + } |
| 396 | + }, |
| 397 | + ) |
388 | 398 | available_batch = np.sum(self.resource_manager.stop_flags) |
389 | 399 | if len(tasks) > available_batch: |
390 | 400 | self.llm_logger.error(f"Inserting batch:{len(tasks)} exceeds the available batch:{available_batch}.") |
@@ -418,6 +428,39 @@ def insert_tasks(self, tasks, current_id=-1, allocated=False): |
418 | 428 | self.llm_logger.info(f"Tasks are sent to engine, req_ids={req_ids}") |
419 | 429 | for task in tasks: |
420 | 430 | task.inference_start_time = time.time() |
| 431 | + trace_logger.info( |
| 432 | + "resource allocate end", |
| 433 | + extra={ |
| 434 | + "attributes": { |
| 435 | + "request_id": f"{task.request_id}", |
| 436 | + "user_id": f"{getattr(task, 'user', '')}", |
| 437 | + "event": "RESOURCE_ALLOCATE_END", |
| 438 | + "stage": "SCHEDULE", |
| 439 | + } |
| 440 | + }, |
| 441 | + ) |
| 442 | + trace_logger.info( |
| 443 | + "request schedule end", |
| 444 | + extra={ |
| 445 | + "attributes": { |
| 446 | + "request_id": f"{task.request_id}", |
| 447 | + "user_id": f"{getattr(task, 'user', '')}", |
| 448 | + "event": "REQUEST_SCHEDULE_END", |
| 449 | + "stage": "SCHEDULE", |
| 450 | + } |
| 451 | + }, |
| 452 | + ) |
| 453 | + trace_logger.info( |
| 454 | + "request inference start", |
| 455 | + extra={ |
| 456 | + "attributes": { |
| 457 | + "request_id": f"{task.request_id}", |
| 458 | + "user_id": f"{getattr(task, 'user', '')}", |
| 459 | + "event": "INFERENCE_START", |
| 460 | + "stage": "PREFILL", |
| 461 | + } |
| 462 | + }, |
| 463 | + ) |
421 | 464 | if not is_prefill: |
422 | 465 | if not self.cfg.model_config.enable_mm: |
423 | 466 | self.update_requests_chunk_size(tasks) |
@@ -611,7 +654,18 @@ def _insert_task_to_worker(self): |
611 | 654 | max_num_batched_tokens=self.cfg.scheduler_config.max_num_batched_tokens, |
612 | 655 | batch=num_prefill_batch, |
613 | 656 | ) |
614 | | - |
| 657 | + for task in tasks: |
| 658 | + trace_logger.info( |
| 659 | + "request queue end", |
| 660 | + extra={ |
| 661 | + "attributes": { |
| 662 | + "request_id": f"{task.request_id}", |
| 663 | + "user_id": f"{getattr(task, 'user', '')}", |
| 664 | + "event": "REQUEST_QUEUE_END", |
| 665 | + "stage": "SCHEDULE", |
| 666 | + } |
| 667 | + }, |
| 668 | + ) |
615 | 669 | if len(tasks) == 0: |
616 | 670 | time.sleep(0.001) |
617 | 671 | continue |
@@ -702,6 +756,18 @@ def _fetch_request(): |
702 | 756 | time.sleep(0.001) |
703 | 757 | # Fetch requests and add them to the scheduling queue |
704 | 758 | if tasks: |
| 759 | + for task in tasks: |
| 760 | + trace_logger.info( |
| 761 | + "resource allocate start", |
| 762 | + extra={ |
| 763 | + "attributes": { |
| 764 | + "request_id": f"{task.request_id}", |
| 765 | + "user_id": f"{getattr(task, 'user', '')}", |
| 766 | + "event": "RESOURCE_ALLOCATE_START", |
| 767 | + "stage": "SCHEDULE", |
| 768 | + } |
| 769 | + }, |
| 770 | + ) |
705 | 771 | if self.cfg.scheduler_config.splitwise_role == "prefill": |
706 | 772 | self.resource_manager.add_request_in_p(tasks) |
707 | 773 | else: |
@@ -756,6 +822,39 @@ def _fetch_request(): |
756 | 822 | ] |
757 | 823 | ) |
758 | 824 | self.resource_manager.get_real_bsz() |
| 825 | + for task in tasks: |
| 826 | + trace_logger.info( |
| 827 | + "resource allocate end", |
| 828 | + extra={ |
| 829 | + "attributes": { |
| 830 | + "request_id": f"{task.request_id}", |
| 831 | + "user_id": f"{getattr(task, 'user', '')}", |
| 832 | + "event": "RESOURCE_ALLOCATE_END", |
| 833 | + "stage": "SCHEDULE", |
| 834 | + } |
| 835 | + }, |
| 836 | + ) |
| 837 | + trace_logger.info( |
| 838 | + "request schedule end", |
| 839 | + extra={ |
| 840 | + "attributes": { |
| 841 | + "request_id": f"{task.request_id}", |
| 842 | + "user_id": f"{getattr(task, 'user', '')}", |
| 843 | + "event": "REQUEST_SCHEDULE_END", |
| 844 | + "stage": "SCHEDULE", |
| 845 | + } |
| 846 | + }, |
| 847 | + ) |
| 848 | + trace_logger.info( |
| 849 | + "request inference start", |
| 850 | + extra={ |
| 851 | + "attributes": { |
| 852 | + "request_id": f"{task.request_id}", |
| 853 | + "user_id": f"{getattr(task, 'user', '')}", |
| 854 | + "event": "INFERENCE_START", |
| 855 | + } |
| 856 | + }, |
| 857 | + ) |
759 | 858 | self.engine_worker_queue.put_tasks((tasks, self.resource_manager.real_bsz)) |
760 | 859 | else: |
761 | 860 | time.sleep(0.005) |
@@ -813,6 +912,39 @@ def _insert_zmq_task_to_scheduler(self): |
813 | 912 | start_span("ENQUEUE_ZMQ", data, trace.SpanKind.PRODUCER) |
814 | 913 | main_process_metrics.requests_number.inc() |
815 | 914 | self.llm_logger.debug(f"Receive request: {request}") |
| 915 | + trace_logger.info( |
| 916 | + "preprocess end", |
| 917 | + extra={ |
| 918 | + "attributes": { |
| 919 | + "request_id": f"{data['request_id']}", |
| 920 | + "user_id": f"{data.get('user', '')}", |
| 921 | + "event": "PREPROCESSING_END", |
| 922 | + "stage": "PREPROCESSING", |
| 923 | + } |
| 924 | + }, |
| 925 | + ) |
| 926 | + trace_logger.info( |
| 927 | + "request schedule start", |
| 928 | + extra={ |
| 929 | + "attributes": { |
| 930 | + "request_id": f"{data['request_id']}", |
| 931 | + "user_id": f"{data.get('user', '')}", |
| 932 | + "event": "REQUEST_SCHEDULE_START", |
| 933 | + "stage": "SCHEDULE", |
| 934 | + } |
| 935 | + }, |
| 936 | + ) |
| 937 | + trace_logger.info( |
| 938 | + "request queue start", |
| 939 | + extra={ |
| 940 | + "attributes": { |
| 941 | + "request_id": f"{data['request_id']}", |
| 942 | + "user_id": f"{data.get('user', '')}", |
| 943 | + "event": "REQUEST_QUEUE_START", |
| 944 | + "stage": "SCHEDULE", |
| 945 | + } |
| 946 | + }, |
| 947 | + ) |
816 | 948 | except Exception as e: |
817 | 949 | self.llm_logger.error(f"Receive request error: {e}, {traceback.format_exc()!s}") |
818 | 950 | err_msg = str(e) |
|
0 commit comments