From 60063202517cc9748cfb2530e93313133a3e5753 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 30 Sep 2025 12:50:21 -0700
Subject: [PATCH 1/4] fix(proxy/utils.py): run guardrails before running other
 logging hooks on "async_post_call_success_hook"

Closes LIT-1152
---
 litellm/proxy/utils.py | 58 ++++++++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 27 deletions(-)

diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 5b11c25b2bfe..d3d2972abfac 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -1395,9 +1395,12 @@ async def post_call_success_hook(
         3. /image/generation
         4. /files
         """
+        from litellm.types.guardrails import GuardrailEventHooks
 
-        for callback in litellm.callbacks:
-            try:
+        guardrail_callbacks: List[CustomGuardrail] = []
+        other_callbacks: List[CustomLogger] = []
+        try:
+            for callback in litellm.callbacks:
                 _callback: Optional[CustomLogger] = None
                 if isinstance(callback, str):
                     _callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
@@ -1407,36 +1410,37 @@ async def post_call_success_hook(
                     _callback = callback  # type: ignore
 
                 if _callback is not None:
+                    if isinstance(_callback, CustomGuardrail):
+                        guardrail_callbacks.append(_callback)
+                    else:
+                        other_callbacks.append(_callback)
                     ############## Handle Guardrails ########################################
                     #############################################################################
-                    if isinstance(callback, CustomGuardrail):
-                        # Main - V2 Guardrails implementation
-                        from litellm.types.guardrails import GuardrailEventHooks
 
-                        if (
-                            callback.should_run_guardrail(
-                                data=data, event_type=GuardrailEventHooks.post_call
-                            )
-                            is not True
-                        ):
-                            continue
+            for callback in guardrail_callbacks:
+                # Main - V2 Guardrails implementation
+                if (
+                    callback.should_run_guardrail(
+                        data=data, event_type=GuardrailEventHooks.post_call
+                    )
+                    is not True
+                ):
+                    continue
 
-                        await callback.async_post_call_success_hook(
-                            user_api_key_dict=user_api_key_dict,
-                            data=data,
-                            response=response,
-                        )
+                await callback.async_post_call_success_hook(
+                    user_api_key_dict=user_api_key_dict,
+                    data=data,
+                    response=response,
+                )
 
-                    ############ Handle CustomLogger ###############################
-                    #################################################################
-                    elif isinstance(_callback, CustomLogger):
-                        await _callback.async_post_call_success_hook(
-                            user_api_key_dict=user_api_key_dict,
-                            data=data,
-                            response=response,
-                        )
-            except Exception as e:
-                raise e
+            ############ Handle CustomLogger ###############################
+            #################################################################
+            for callback in other_callbacks:
+                await callback.async_post_call_success_hook(
+                    user_api_key_dict=user_api_key_dict, data=data, response=response
+                )
+        except Exception as e:
+            raise e
         return response
 
     async def async_post_call_streaming_hook(

From 6ca7752381fe8ac6aae08985879d59efdedae6fc Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 30 Sep 2025 15:46:30 -0700
Subject: [PATCH 2/4] fix(prometheus.py): don't require metadata labels to be
 set for all requests

add a default value if metadata label not set
---
 .../integrations/prometheus.py                | 23 ++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/enterprise/litellm_enterprise/integrations/prometheus.py b/enterprise/litellm_enterprise/integrations/prometheus.py
index d3b0aefb86f6..a42f9b642d9a 100644
--- a/enterprise/litellm_enterprise/integrations/prometheus.py
+++ b/enterprise/litellm_enterprise/integrations/prometheus.py
@@ -1649,9 +1649,22 @@ def set_litellm_deployment_state(
         api_base: Optional[str],
         api_provider: str,
     ):
-        self.litellm_deployment_state.labels(
-            litellm_model_name, model_id, api_base, api_provider
-        ).set(state)
+        """
+        Set the deployment state.
+        """
+        ### get labels
+        _labels = prometheus_label_factory(
+            supported_enum_labels=self.get_labels_for_metric(
+                metric_name="litellm_deployment_state"
+            ),
+            enum_values=UserAPIKeyLabelValues(
+                litellm_model_name=litellm_model_name,
+                model_id=model_id,
+                api_base=api_base,
+                api_provider=api_provider,
+            ),
+        )
+        self.litellm_deployment_state.labels(**_labels).set(state)
 
     def set_deployment_healthy(
         self,
@@ -2230,6 +2243,10 @@ def prometheus_label_factory(
         for key, value in enum_values.custom_metadata_labels.items():
             if key in supported_enum_labels:
                 filtered_labels[key] = value
+            else:
+                filtered_labels[key] = (
+                    "None"  # this happens for dynamically added metadata labels
+                )
 
     # Add custom tags if configured
     if enum_values.tags is not None:

From d6800ee706194aeaff40bbacc653b74033a33586 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 30 Sep 2025 17:02:40 -0700
Subject: [PATCH 3/4] feat(prometheus.py): initial working commit of passing
 team/key metadata as prometheus metrics

Closes LIT-1006
---
 .../integrations/prometheus.py                | 12 +++--
 litellm/litellm_core_utils/litellm_logging.py |  2 +
 litellm/proxy/_new_secret_config.yaml         | 33 +++---------
 litellm/proxy/_types.py                       |  1 +
 litellm/proxy/litellm_pre_call_utils.py       | 52 ++++++++++++++++++-
 litellm/types/utils.py                        | 31 ++++++++---
 6 files changed, 93 insertions(+), 38 deletions(-)

diff --git a/enterprise/litellm_enterprise/integrations/prometheus.py b/enterprise/litellm_enterprise/integrations/prometheus.py
index a42f9b642d9a..b472ccbb357b 100644
--- a/enterprise/litellm_enterprise/integrations/prometheus.py
+++ b/enterprise/litellm_enterprise/integrations/prometheus.py
@@ -794,9 +794,16 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
         output_tokens = standard_logging_payload["completion_tokens"]
         tokens_used = standard_logging_payload["total_tokens"]
         response_cost = standard_logging_payload["response_cost"]
-        _requester_metadata = standard_logging_payload["metadata"].get(
+        _requester_metadata: Optional[dict] = standard_logging_payload["metadata"].get(
             "requester_metadata"
         )
+        user_api_key_auth_metadata: Optional[dict] = standard_logging_payload[
+            "metadata"
+        ].get("user_api_key_auth_metadata")
+        combined_metadata: Dict[str, Any] = {
+            **(_requester_metadata if _requester_metadata else {}),
+            **(user_api_key_auth_metadata if user_api_key_auth_metadata else {}),
+        }
         if standard_logging_payload is not None and isinstance(
             standard_logging_payload, dict
         ):
@@ -828,8 +835,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
             exception_status=None,
             exception_class=None,
             custom_metadata_labels=get_custom_labels_from_metadata(
-                metadata=standard_logging_payload["metadata"].get("requester_metadata")
-                or {}
+                metadata=combined_metadata
             ),
             route=standard_logging_payload["metadata"].get(
                 "user_api_key_request_route"
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index 24449e1bd0f7..46e363c865da 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -4019,6 +4019,7 @@ def get_standard_logging_metadata(
             usage_object=usage_object,
             requester_custom_headers=None,
             cold_storage_object_key=None,
+            user_api_key_auth_metadata=None,
         )
         if isinstance(metadata, dict):
             # Filter the metadata dictionary to include only the specified keys
@@ -4685,6 +4686,7 @@ def get_standard_logging_metadata(
         requester_custom_headers=None,
         user_api_key_request_route=None,
         cold_storage_object_key=None,
+        user_api_key_auth_metadata=None,
     )
     if isinstance(metadata, dict):
         # Update the clean_metadata with values from input metadata that match StandardLoggingMetadata fields
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 804cf2cf2cfc..96d9ea0cc316 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -1,30 +1,9 @@
 model_list:
-  - model_name: byok-fixed-gpt-4o-mini
+  - model_name: openai/gpt-4o
     litellm_params:
-      model: openai/gpt-4o-mini
-      api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
-      api_key: dummy
-  - model_name: "byok-wildcard/*"
-    litellm_params:
-      model: openai/*
-  - model_name: xai-grok-3
-    litellm_params:
-      model: xai/grok-3
-  - model_name: hosted_vllm/whisper-v3
-    litellm_params:
-      model: hosted_vllm/whisper-v3
-      api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
-      api_key: dummy
-
-mcp_servers:
-  github_mcp:
-    url: "https://api.githubcopilot.com/mcp"
-    auth_type: oauth2
-    authorization_url: https://github.com/login/oauth/authorize
-    token_url: https://github.com/login/oauth/access_token
-    client_id: os.environ/GITHUB_OAUTH_CLIENT_ID
-    client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET
-    scopes: ["public_repo", "user:email"]
-    allowed_tools: ["list_tools"]
-    # disallowed_tools: ["repo_delete"]
+      model: openai/gpt-4o
+      api_key: os.environ/OPENAI_API_KEY
 
+litellm_settings:
+  callbacks: ["prometheus"]
+  custom_prometheus_metadata_labels: ["metadata.initiative"]
\ No newline at end of file
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index c5370eb7d70d..00ffae718e16 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -3066,6 +3066,7 @@ class PassThroughEndpointLoggingTypedDict(TypedDict):
     "tags",
     "team_member_key_duration",
     "prompts",
+    "logging",
 ]
 
 
diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index e077d0ee923f..73052d149577 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -579,7 +579,12 @@ def get_sanitized_user_information_from_key(
             user_api_key_end_user_id=user_api_key_dict.end_user_id,
             user_api_key_user_email=user_api_key_dict.user_email,
             user_api_key_request_route=user_api_key_dict.request_route,
-            user_api_key_budget_reset_at=user_api_key_dict.budget_reset_at.isoformat() if user_api_key_dict.budget_reset_at else None,
+            user_api_key_budget_reset_at=(
+                user_api_key_dict.budget_reset_at.isoformat()
+                if user_api_key_dict.budget_reset_at
+                else None
+            ),
+            user_api_key_auth_metadata=None,
         )
         return user_api_key_logged_metadata
 
@@ -607,6 +612,35 @@ def add_user_api_key_auth_to_request_metadata(
         )
         return data
 
+    @staticmethod
+    def add_management_endpoint_metadata_to_request_metadata(
+        data: dict,
+        management_endpoint_metadata: dict,
+        _metadata_variable_name: str,
+    ) -> dict:
+        """
+        Adds the `UserAPIKeyAuth` metadata to the request metadata.
+
+        ignore any sensitive fields like logging, api_key, etc.
+        """
+        from litellm.proxy._types import (
+            LiteLLM_ManagementEndpoint_MetadataFields,
+            LiteLLM_ManagementEndpoint_MetadataFields_Premium,
+        )
+
+        # ignore any special fields
+        added_metadata = {}
+        for k, v in management_endpoint_metadata.items():
+            if k not in (
+                LiteLLM_ManagementEndpoint_MetadataFields_Premium
+                + LiteLLM_ManagementEndpoint_MetadataFields
+            ):
+                added_metadata[k] = v
+        data[_metadata_variable_name].setdefault(
+            "user_api_key_auth_metadata", {}
+        ).update(added_metadata)
+        return data
+
     @staticmethod
     def add_key_level_controls(
         key_metadata: Optional[dict], data: dict, _metadata_variable_name: str
@@ -651,6 +685,13 @@ def add_key_level_controls(
             key_metadata["disable_fallbacks"], bool
         ):
             data["disable_fallbacks"] = key_metadata["disable_fallbacks"]
+
+        ## KEY-LEVEL METADATA
+        data = LiteLLMProxyRequestSetup.add_management_endpoint_metadata_to_request_metadata(
+            data=data,
+            management_endpoint_metadata=key_metadata,
+            _metadata_variable_name=_metadata_variable_name,
+        )
         return data
 
     @staticmethod
@@ -889,6 +930,15 @@ async def add_litellm_data_to_request(  # noqa: PLR0915
                 "spend_logs_metadata"
             ]
 
+    ## TEAM-LEVEL METADATA
+    data = (
+        LiteLLMProxyRequestSetup.add_management_endpoint_metadata_to_request_metadata(
+            data=data,
+            management_endpoint_metadata=team_metadata,
+            _metadata_variable_name=_metadata_variable_name,
+        )
+    )
+
     # Team spend, budget - used by prometheus.py
     data[_metadata_variable_name][
         "user_api_key_team_max_budget"
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index e5786e50a5de..c8de97bba203 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -123,12 +123,18 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
     max_output_tokens: Required[Optional[int]]
     input_cost_per_token: Required[float]
     input_cost_per_token_flex: Optional[float]  # OpenAI flex service tier pricing
-    input_cost_per_token_priority: Optional[float]  # OpenAI priority service tier pricing
+    input_cost_per_token_priority: Optional[
+        float
+    ]  # OpenAI priority service tier pricing
     cache_creation_input_token_cost: Optional[float]
     cache_creation_input_token_cost_above_1hr: Optional[float]
     cache_read_input_token_cost: Optional[float]
-    cache_read_input_token_cost_flex: Optional[float]  # OpenAI flex service tier pricing
-    cache_read_input_token_cost_priority: Optional[float]  # OpenAI priority service tier pricing
+    cache_read_input_token_cost_flex: Optional[
+        float
+    ]  # OpenAI flex service tier pricing
+    cache_read_input_token_cost_priority: Optional[
+        float
+    ]  # OpenAI priority service tier pricing
     input_cost_per_character: Optional[float]  # only for vertex ai models
     input_cost_per_audio_token: Optional[float]
     input_cost_per_token_above_128k_tokens: Optional[float]  # only for vertex ai models
@@ -147,7 +153,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
     output_cost_per_token_batches: Optional[float]
     output_cost_per_token: Required[float]
     output_cost_per_token_flex: Optional[float]  # OpenAI flex service tier pricing
-    output_cost_per_token_priority: Optional[float]  # OpenAI priority service tier pricing
+    output_cost_per_token_priority: Optional[
+        float
+    ]  # OpenAI priority service tier pricing
     output_cost_per_character: Optional[float]  # only for vertex ai models
     output_cost_per_audio_token: Optional[float]
     output_cost_per_token_above_128k_tokens: Optional[
@@ -1856,6 +1864,7 @@ class StandardLoggingUserAPIKeyMetadata(TypedDict):
     user_api_key_team_alias: Optional[str]
     user_api_key_end_user_id: Optional[str]
     user_api_key_request_route: Optional[str]
+    user_api_key_auth_metadata: Optional[Dict[str, str]]
 
 
 class StandardLoggingMCPToolCall(TypedDict, total=False):
@@ -2059,10 +2068,12 @@ class StandardLoggingGuardrailInformation(TypedDict, total=False):
 
 StandardLoggingPayloadStatus = Literal["success", "failure"]
 
+
 class CachingDetails(TypedDict):
     """
     Track all caching related metrics, fields for a given request
     """
+
     cache_hit: Optional[bool]
     """
     Whether the request hit the cache
@@ -2072,12 +2083,16 @@ class CachingDetails(TypedDict):
     Duration for reading from cache
     """
 
+
 class CostBreakdown(TypedDict):
     """
     Detailed cost breakdown for a request
     """
+
     input_cost: float  # Cost of input/prompt tokens
-    output_cost: float  # Cost of output/completion tokens (includes reasoning if applicable)
+    output_cost: (
+        float  # Cost of output/completion tokens (includes reasoning if applicable)
+    )
     total_cost: float  # Total cost (input + output + tool usage)
     tool_usage_cost: float  # Cost of usage of built-in tools
 
@@ -2616,6 +2631,7 @@ class SpecialEnums(Enum):
 
 class ServiceTier(Enum):
     """Enum for service tier types used in cost calculations."""
+
     FLEX = "flex"
     PRIORITY = "priority"
 
@@ -2662,13 +2678,14 @@ class CallbacksByType(TypedDict):
 class PriorityReservationSettings(BaseModel):
     """
     Settings for priority-based rate limiting reservation.
-    
+
     Defines what priority to assign to keys without explicit priority metadata.
     The priority_reservation mapping is configured separately via litellm.priority_reservation.
     """
+
     default_priority: float = Field(
         default=0.5,
-        description="Priority level to assign to API keys without explicit priority metadata. Should match a key in litellm.priority_reservation."
+        description="Priority level to assign to API keys without explicit priority metadata. Should match a key in litellm.priority_reservation.",
     )
 
     model_config = ConfigDict(protected_namespaces=())

From a1a0e99638ebca998db597649b679a4f1d869a81 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 30 Sep 2025 21:23:25 -0700
Subject: [PATCH 4/4] fix(prometheus.py): working e2e calls w/
 userapikeymetadata

---
 .../litellm_enterprise/integrations/prometheus.py     | 11 +++++------
 litellm/proxy/_new_secret_config.yaml                 |  2 +-
 litellm/proxy/litellm_pre_call_utils.py               |  8 +++++---
 litellm/types/integrations/prometheus.py              |  8 ++++----
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/enterprise/litellm_enterprise/integrations/prometheus.py b/enterprise/litellm_enterprise/integrations/prometheus.py
index b472ccbb357b..3b37e14b8969 100644
--- a/enterprise/litellm_enterprise/integrations/prometheus.py
+++ b/enterprise/litellm_enterprise/integrations/prometheus.py
@@ -21,6 +21,7 @@
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth
 from litellm.types.integrations.prometheus import *
+from litellm.types.integrations.prometheus import _sanitize_prometheus_label_name
 from litellm.types.utils import StandardLoggingPayload
 from litellm.utils import get_end_user_id_for_cost_tracking
 
@@ -2247,12 +2248,10 @@ def prometheus_label_factory(
 
     if enum_values.custom_metadata_labels is not None:
         for key, value in enum_values.custom_metadata_labels.items():
-            if key in supported_enum_labels:
-                filtered_labels[key] = value
-            else:
-                filtered_labels[key] = (
-                    "None"  # this happens for dynamically added metadata labels
-                )
+            # check sanitized key
+            sanitized_key = _sanitize_prometheus_label_name(key)
+            if sanitized_key in supported_enum_labels:
+                filtered_labels[sanitized_key] = value
 
     # Add custom tags if configured
     if enum_values.tags is not None:
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 96d9ea0cc316..5d8052493a15 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -6,4 +6,4 @@ model_list:
 
 litellm_settings:
   callbacks: ["prometheus"]
-  custom_prometheus_metadata_labels: ["metadata.initiative"]
\ No newline at end of file
+  custom_prometheus_metadata_labels: ["metadata.initiative", "metadata.business-unit"]
\ No newline at end of file
diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index 73052d149577..44e26313f952 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -636,9 +636,11 @@ def add_management_endpoint_metadata_to_request_metadata(
                 + LiteLLM_ManagementEndpoint_MetadataFields
             ):
                 added_metadata[k] = v
-        data[_metadata_variable_name].setdefault(
-            "user_api_key_auth_metadata", {}
-        ).update(added_metadata)
+        if data[_metadata_variable_name].get("user_api_key_auth_metadata") is None:
+            data[_metadata_variable_name]["user_api_key_auth_metadata"] = {}
+        data[_metadata_variable_name]["user_api_key_auth_metadata"].update(
+            added_metadata
+        )
         return data
 
     @staticmethod
diff --git a/litellm/types/integrations/prometheus.py b/litellm/types/integrations/prometheus.py
index 9c1a14a830e9..a3dd4dcb1c60 100644
--- a/litellm/types/integrations/prometheus.py
+++ b/litellm/types/integrations/prometheus.py
@@ -426,13 +426,13 @@ class PrometheusMetricLabels:
 
     # Buffer monitoring metrics - these typically don't need additional labels
     litellm_pod_lock_manager_size: List[str] = []
-    
+
     litellm_in_memory_daily_spend_update_queue_size: List[str] = []
-    
+
     litellm_redis_daily_spend_update_queue_size: List[str] = []
-    
+
     litellm_in_memory_spend_update_queue_size: List[str] = []
-    
+
     litellm_redis_spend_update_queue_size: List[str] = []
 
     @staticmethod