BerriAI · krrishdholakia · Oct 4, 2025 · Sep 30, 2025 · Sep 30, 2025 · Oct 1, 2025
diff --git a/enterprise/litellm_enterprise/integrations/prometheus.py b/enterprise/litellm_enterprise/integrations/prometheus.py
@@ -21,6 +21,7 @@
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth
 from litellm.types.integrations.prometheus import *
+from litellm.types.integrations.prometheus import _sanitize_prometheus_label_name
 from litellm.types.utils import StandardLoggingPayload
 from litellm.utils import get_end_user_id_for_cost_tracking
 
@@ -794,9 +795,16 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
         output_tokens = standard_logging_payload["completion_tokens"]
         tokens_used = standard_logging_payload["total_tokens"]
         response_cost = standard_logging_payload["response_cost"]
-        _requester_metadata = standard_logging_payload["metadata"].get(
+        _requester_metadata: Optional[dict] = standard_logging_payload["metadata"].get(
             "requester_metadata"
         )
+        user_api_key_auth_metadata: Optional[dict] = standard_logging_payload[
+            "metadata"
+        ].get("user_api_key_auth_metadata")
+        combined_metadata: Dict[str, Any] = {
+            **(_requester_metadata if _requester_metadata else {}),
+            **(user_api_key_auth_metadata if user_api_key_auth_metadata else {}),
+        }
         if standard_logging_payload is not None and isinstance(
             standard_logging_payload, dict
         ):
@@ -828,8 +836,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
             exception_status=None,
             exception_class=None,
             custom_metadata_labels=get_custom_labels_from_metadata(
-                metadata=standard_logging_payload["metadata"].get("requester_metadata")
-                or {}
+                metadata=combined_metadata
             ),
             route=standard_logging_payload["metadata"].get(
                 "user_api_key_request_route"
@@ -1649,9 +1656,22 @@ def set_litellm_deployment_state(
         api_base: Optional[str],
         api_provider: str,
     ):
-        self.litellm_deployment_state.labels(
-            litellm_model_name, model_id, api_base, api_provider
-        ).set(state)
+        """
+        Set the deployment state.
+        """
+        ### get labels
+        _labels = prometheus_label_factory(
+            supported_enum_labels=self.get_labels_for_metric(
+                metric_name="litellm_deployment_state"
+            ),
+            enum_values=UserAPIKeyLabelValues(
+                litellm_model_name=litellm_model_name,
+                model_id=model_id,
+                api_base=api_base,
+                api_provider=api_provider,
+            ),
+        )
+        self.litellm_deployment_state.labels(**_labels).set(state)
 
     def set_deployment_healthy(
         self,
@@ -2228,8 +2248,10 @@ def prometheus_label_factory(
 
     if enum_values.custom_metadata_labels is not None:
         for key, value in enum_values.custom_metadata_labels.items():
-            if key in supported_enum_labels:
-                filtered_labels[key] = value
+            # check sanitized key
+            sanitized_key = _sanitize_prometheus_label_name(key)
+            if sanitized_key in supported_enum_labels:
+                filtered_labels[sanitized_key] = value
 
     # Add custom tags if configured
     if enum_values.tags is not None:

diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
@@ -4040,6 +4040,7 @@ def get_standard_logging_metadata(
             usage_object=usage_object,
             requester_custom_headers=None,
             cold_storage_object_key=None,
+            user_api_key_auth_metadata=None,
         )
         if isinstance(metadata, dict):
             # Filter the metadata dictionary to include only the specified keys
@@ -4755,6 +4756,7 @@ def get_standard_logging_metadata(
         requester_custom_headers=None,
         user_api_key_request_route=None,
         cold_storage_object_key=None,
+        user_api_key_auth_metadata=None,
     )
     if isinstance(metadata, dict):
         # Update the clean_metadata with values from input metadata that match StandardLoggingMetadata fields

diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
@@ -1,30 +1,9 @@
 model_list:
-  - model_name: byok-fixed-gpt-4o-mini
+  - model_name: openai/gpt-4o
     litellm_params:
-      model: openai/gpt-4o-mini
-      api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
-      api_key: dummy
-  - model_name: "byok-wildcard/*"
-    litellm_params:
-      model: openai/*
-  - model_name: xai-grok-3
-    litellm_params:
-      model: xai/grok-3
-  - model_name: hosted_vllm/whisper-v3
-    litellm_params:
-      model: hosted_vllm/whisper-v3
-      api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
-      api_key: dummy
-
-mcp_servers:
-  github_mcp:
-    url: "https://api.githubcopilot.com/mcp"
-    auth_type: oauth2
-    authorization_url: https://github.com/login/oauth/authorize
-    token_url: https://github.com/login/oauth/access_token
-    client_id: os.environ/GITHUB_OAUTH_CLIENT_ID
-    client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET
-    scopes: ["public_repo", "user:email"]
-    allowed_tools: ["list_tools"]
-    # disallowed_tools: ["repo_delete"]
+      model: openai/gpt-4o
+      api_key: os.environ/OPENAI_API_KEY
 
+litellm_settings:
+  callbacks: ["prometheus"]
+  custom_prometheus_metadata_labels: ["metadata.initiative", "metadata.business-unit"]
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
@@ -3068,6 +3068,7 @@ class PassThroughEndpointLoggingTypedDict(TypedDict):
     "tags",
     "team_member_key_duration",
     "prompts",
+    "logging",
 ]
 
 

diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
@@ -579,7 +579,12 @@ def get_sanitized_user_information_from_key(
             user_api_key_end_user_id=user_api_key_dict.end_user_id,
             user_api_key_user_email=user_api_key_dict.user_email,
             user_api_key_request_route=user_api_key_dict.request_route,
-            user_api_key_budget_reset_at=user_api_key_dict.budget_reset_at.isoformat() if user_api_key_dict.budget_reset_at else None,
+            user_api_key_budget_reset_at=(
+                user_api_key_dict.budget_reset_at.isoformat()
+                if user_api_key_dict.budget_reset_at
+                else None
+            ),
+            user_api_key_auth_metadata=None,
         )
         return user_api_key_logged_metadata
 
@@ -607,6 +612,37 @@ def add_user_api_key_auth_to_request_metadata(
         )
         return data
 
+    @staticmethod
+    def add_management_endpoint_metadata_to_request_metadata(
+        data: dict,
+        management_endpoint_metadata: dict,
+        _metadata_variable_name: str,
+    ) -> dict:
+        """
+        Adds the `UserAPIKeyAuth` metadata to the request metadata.
+
+        ignore any sensitive fields like logging, api_key, etc.
+        """
+        from litellm.proxy._types import (
+            LiteLLM_ManagementEndpoint_MetadataFields,
+            LiteLLM_ManagementEndpoint_MetadataFields_Premium,
+        )
+
+        # ignore any special fields
+        added_metadata = {}
+        for k, v in management_endpoint_metadata.items():
+            if k not in (
+                LiteLLM_ManagementEndpoint_MetadataFields_Premium
+                + LiteLLM_ManagementEndpoint_MetadataFields
+            ):
+                added_metadata[k] = v
+        if data[_metadata_variable_name].get("user_api_key_auth_metadata") is None:
+            data[_metadata_variable_name]["user_api_key_auth_metadata"] = {}
+        data[_metadata_variable_name]["user_api_key_auth_metadata"].update(
+            added_metadata
+        )
+        return data
+
     @staticmethod
     def add_key_level_controls(
         key_metadata: Optional[dict], data: dict, _metadata_variable_name: str
@@ -651,6 +687,13 @@ def add_key_level_controls(
             key_metadata["disable_fallbacks"], bool
         ):
             data["disable_fallbacks"] = key_metadata["disable_fallbacks"]
+
+        ## KEY-LEVEL METADATA
+        data = LiteLLMProxyRequestSetup.add_management_endpoint_metadata_to_request_metadata(
+            data=data,
+            management_endpoint_metadata=key_metadata,
+            _metadata_variable_name=_metadata_variable_name,
+        )
         return data
 
     @staticmethod
@@ -889,6 +932,15 @@ async def add_litellm_data_to_request(  # noqa: PLR0915
                 "spend_logs_metadata"
             ]
 
+    ## TEAM-LEVEL METADATA
+    data = (
+        LiteLLMProxyRequestSetup.add_management_endpoint_metadata_to_request_metadata(
+            data=data,
+            management_endpoint_metadata=team_metadata,
+            _metadata_variable_name=_metadata_variable_name,
+        )
+    )
+
     # Team spend, budget - used by prometheus.py
     data[_metadata_variable_name][
         "user_api_key_team_max_budget"

diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
@@ -1395,9 +1395,12 @@ async def post_call_success_hook(
         3. /image/generation
         4. /files
         """
+        from litellm.types.guardrails import GuardrailEventHooks
 
-        for callback in litellm.callbacks:
-            try:
+        guardrail_callbacks: List[CustomGuardrail] = []
+        other_callbacks: List[CustomLogger] = []
+        try:
+            for callback in litellm.callbacks:
                 _callback: Optional[CustomLogger] = None
                 if isinstance(callback, str):
                     _callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
@@ -1407,36 +1410,37 @@ async def post_call_success_hook(
                     _callback = callback  # type: ignore
 
                 if _callback is not None:
+                    if isinstance(_callback, CustomGuardrail):
+                        guardrail_callbacks.append(_callback)
+                    else:
+                        other_callbacks.append(_callback)
                     ############## Handle Guardrails ########################################
                     #############################################################################
-                    if isinstance(callback, CustomGuardrail):
-                        # Main - V2 Guardrails implementation
-                        from litellm.types.guardrails import GuardrailEventHooks
 
-                        if (
-                            callback.should_run_guardrail(
-                                data=data, event_type=GuardrailEventHooks.post_call
-                            )
-                            is not True
-                        ):
-                            continue
+            for callback in guardrail_callbacks:
+                # Main - V2 Guardrails implementation
+                if (
+                    callback.should_run_guardrail(
+                        data=data, event_type=GuardrailEventHooks.post_call
+                    )
+                    is not True
+                ):
+                    continue
 
-                        await callback.async_post_call_success_hook(
-                            user_api_key_dict=user_api_key_dict,
-                            data=data,
-                            response=response,
-                        )
+                await callback.async_post_call_success_hook(
+                    user_api_key_dict=user_api_key_dict,
+                    data=data,
+                    response=response,
+                )
 
-                    ############ Handle CustomLogger ###############################
-                    #################################################################
-                    elif isinstance(_callback, CustomLogger):
-                        await _callback.async_post_call_success_hook(
-                            user_api_key_dict=user_api_key_dict,
-                            data=data,
-                            response=response,
-                        )
-            except Exception as e:
-                raise e
+            ############ Handle CustomLogger ###############################
+            #################################################################
+            for callback in other_callbacks:
+                await callback.async_post_call_success_hook(
+                    user_api_key_dict=user_api_key_dict, data=data, response=response
+                )
+        except Exception as e:
+            raise e
         return response
 
     async def async_post_call_streaming_hook(

diff --git a/litellm/types/integrations/prometheus.py b/litellm/types/integrations/prometheus.py
@@ -426,13 +426,13 @@ class PrometheusMetricLabels:
 
     # Buffer monitoring metrics - these typically don't need additional labels
     litellm_pod_lock_manager_size: List[str] = []
-    
+
     litellm_in_memory_daily_spend_update_queue_size: List[str] = []
-    
+
     litellm_redis_daily_spend_update_queue_size: List[str] = []
-    
+
     litellm_in_memory_spend_update_queue_size: List[str] = []
-    
+
     litellm_redis_spend_update_queue_size: List[str] = []
 
     @staticmethod

diff --git a/litellm/types/utils.py b/litellm/types/utils.py
@@ -1867,6 +1867,7 @@ class StandardLoggingUserAPIKeyMetadata(TypedDict):
     user_api_key_team_alias: Optional[str]
     user_api_key_end_user_id: Optional[str]
     user_api_key_request_route: Optional[str]
+    user_api_key_auth_metadata: Optional[Dict[str, str]]
 
 
 class StandardLoggingMCPToolCall(TypedDict, total=False):
@@ -2077,10 +2078,12 @@ class StandardLoggingGuardrailInformation(TypedDict, total=False):
 
 StandardLoggingPayloadStatus = Literal["success", "failure"]
 
+
 class CachingDetails(TypedDict):
     """
     Track all caching related metrics, fields for a given request
     """
+
     cache_hit: Optional[bool]
     """
     Whether the request hit the cache
@@ -2090,12 +2093,16 @@ class CachingDetails(TypedDict):
     Duration for reading from cache
     """
 
+
 class CostBreakdown(TypedDict):
     """
     Detailed cost breakdown for a request
     """
+
     input_cost: float  # Cost of input/prompt tokens
-    output_cost: float  # Cost of output/completion tokens (includes reasoning if applicable)
+    output_cost: (
+        float  # Cost of output/completion tokens (includes reasoning if applicable)
+    )
     total_cost: float  # Total cost (input + output + tool usage)
     tool_usage_cost: float  # Cost of usage of built-in tools
-Original file line number
+Diff line change
@@ Expand Up / @@ -3068,6 +3068,7 @@ class PassThroughEndpointLoggingTypedDict(TypedDict): @@
         "tags",
         "team_member_key_duration",
         "prompts",
+        "logging",
     ]
@@ Expand Down @@