diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 32f2956a1167..37e42304f5e5 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -71,6 +71,7 @@ /python/ray/data/llm.py @ray-project/ray-llm /python/ray/dashboard/modules/metrics/dashboards/serve_llm_dashboard_panels.py @ray-project/ray-llm /python/ray/dashboard/modules/metrics/dashboards/serve_llm_grafana_dashboard_base.json @ray-project/ray-llm +/doc/source/serve/llm/ @ray-project/ray-llm # Ray Serve /python/ray/serve/ @ray-project/ray-serve diff --git a/doc/source/serve/llm/quick-start.rst b/doc/source/serve/llm/quick-start.rst index f47b3465d4c6..0c416e36d090 100644 --- a/doc/source/serve/llm/quick-start.rst +++ b/doc/source/serve/llm/quick-start.rst @@ -298,7 +298,7 @@ Engine Metrics --------------------- All engine metrics, including vLLM, are available through the Ray metrics export endpoint and are queryable using Prometheus. See `vLLM metrics `_ for a complete list. These are also visualized by the Serve LLM Grafana dashboard. Dashboard panels include: time per output token (TPOT), time to first token (TTFT), and GPU cache utilization. -Engine metric logging is off by default, and must be manually enabled. In addition, you must enable the vLLM V1 engine to use engine metrics. To enable engine-level metric logging, set `log_engine_metrics: True` when configuring the LLM deployment. For example: +Engine metric logging is on by default as of Ray 2.51. To disable engine-level metric logging, set `log_engine_metrics: False` when configuring the LLM deployment. For example: .. tab-set:: @@ -320,7 +320,7 @@ Engine metric logging is off by default, and must be manually enabled. In additi min_replicas=1, max_replicas=2, ) ), - log_engine_metrics=True + log_engine_metrics=False ) app = build_openai_app({"llm_configs": [llm_config]}) @@ -343,7 +343,7 @@ Engine metric logging is off by default, and must be manually enabled. In additi autoscaling_config: min_replicas: 1 max_replicas: 2 - log_engine_metrics: true + log_engine_metrics: false import_path: ray.serve.llm:build_openai_app name: llm_app route_prefix: "/" diff --git a/python/ray/llm/_internal/serve/configs/server_models.py b/python/ray/llm/_internal/serve/configs/server_models.py index e0587405f67a..03665efa3c7e 100644 --- a/python/ray/llm/_internal/serve/configs/server_models.py +++ b/python/ray/llm/_internal/serve/configs/server_models.py @@ -208,7 +208,7 @@ class LLMConfig(BaseModelExtended): log_engine_metrics: Optional[bool] = Field( default=True, - description="Enable additional engine metrics via Ray Prometheus port. Default is True.", + description="Enable additional engine metrics via Ray Prometheus port.", ) _supports_vision: bool = PrivateAttr(False)