From 60063202517cc9748cfb2530e93313133a3e5753 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 30 Sep 2025 12:50:21 -0700 Subject: [PATCH 1/4] fix(proxy/utils.py): run guardrails before running other logging hooks on "async_post_call_success_hook" Closes LIT-1152 --- litellm/proxy/utils.py | 58 ++++++++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 5b11c25b2bfe..d3d2972abfac 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -1395,9 +1395,12 @@ async def post_call_success_hook( 3. /image/generation 4. /files """ + from litellm.types.guardrails import GuardrailEventHooks - for callback in litellm.callbacks: - try: + guardrail_callbacks: List[CustomGuardrail] = [] + other_callbacks: List[CustomLogger] = [] + try: + for callback in litellm.callbacks: _callback: Optional[CustomLogger] = None if isinstance(callback, str): _callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class( @@ -1407,36 +1410,37 @@ async def post_call_success_hook( _callback = callback # type: ignore if _callback is not None: + if isinstance(_callback, CustomGuardrail): + guardrail_callbacks.append(_callback) + else: + other_callbacks.append(_callback) ############## Handle Guardrails ######################################## ############################################################################# - if isinstance(callback, CustomGuardrail): - # Main - V2 Guardrails implementation - from litellm.types.guardrails import GuardrailEventHooks - if ( - callback.should_run_guardrail( - data=data, event_type=GuardrailEventHooks.post_call - ) - is not True - ): - continue + for callback in guardrail_callbacks: + # Main - V2 Guardrails implementation + if ( + callback.should_run_guardrail( + data=data, event_type=GuardrailEventHooks.post_call + ) + is not True + ): + continue - await callback.async_post_call_success_hook( - user_api_key_dict=user_api_key_dict, - data=data, - response=response, - ) + await callback.async_post_call_success_hook( + user_api_key_dict=user_api_key_dict, + data=data, + response=response, + ) - ############ Handle CustomLogger ############################### - ################################################################# - elif isinstance(_callback, CustomLogger): - await _callback.async_post_call_success_hook( - user_api_key_dict=user_api_key_dict, - data=data, - response=response, - ) - except Exception as e: - raise e + ############ Handle CustomLogger ############################### + ################################################################# + for callback in other_callbacks: + await callback.async_post_call_success_hook( + user_api_key_dict=user_api_key_dict, data=data, response=response + ) + except Exception as e: + raise e return response async def async_post_call_streaming_hook( From 6ca7752381fe8ac6aae08985879d59efdedae6fc Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 30 Sep 2025 15:46:30 -0700 Subject: [PATCH 2/4] fix(prometheus.py): don't require metadata labels to be set for all requests add a default value if metadata label not set --- .../integrations/prometheus.py | 23 ++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/enterprise/litellm_enterprise/integrations/prometheus.py b/enterprise/litellm_enterprise/integrations/prometheus.py index d3b0aefb86f6..a42f9b642d9a 100644 --- a/enterprise/litellm_enterprise/integrations/prometheus.py +++ b/enterprise/litellm_enterprise/integrations/prometheus.py @@ -1649,9 +1649,22 @@ def set_litellm_deployment_state( api_base: Optional[str], api_provider: str, ): - self.litellm_deployment_state.labels( - litellm_model_name, model_id, api_base, api_provider - ).set(state) + """ + Set the deployment state. + """ + ### get labels + _labels = prometheus_label_factory( + supported_enum_labels=self.get_labels_for_metric( + metric_name="litellm_deployment_state" + ), + enum_values=UserAPIKeyLabelValues( + litellm_model_name=litellm_model_name, + model_id=model_id, + api_base=api_base, + api_provider=api_provider, + ), + ) + self.litellm_deployment_state.labels(**_labels).set(state) def set_deployment_healthy( self, @@ -2230,6 +2243,10 @@ def prometheus_label_factory( for key, value in enum_values.custom_metadata_labels.items(): if key in supported_enum_labels: filtered_labels[key] = value + else: + filtered_labels[key] = ( + "None" # this happens for dynamically added metadata labels + ) # Add custom tags if configured if enum_values.tags is not None: From d6800ee706194aeaff40bbacc653b74033a33586 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 30 Sep 2025 17:02:40 -0700 Subject: [PATCH 3/4] feat(prometheus.py): initial working commit of passing team/key metadata as prometheus metrics Closes LIT-1006 --- .../integrations/prometheus.py | 12 +++-- litellm/litellm_core_utils/litellm_logging.py | 2 + litellm/proxy/_new_secret_config.yaml | 33 +++--------- litellm/proxy/_types.py | 1 + litellm/proxy/litellm_pre_call_utils.py | 52 ++++++++++++++++++- litellm/types/utils.py | 31 ++++++++--- 6 files changed, 93 insertions(+), 38 deletions(-) diff --git a/enterprise/litellm_enterprise/integrations/prometheus.py b/enterprise/litellm_enterprise/integrations/prometheus.py index a42f9b642d9a..b472ccbb357b 100644 --- a/enterprise/litellm_enterprise/integrations/prometheus.py +++ b/enterprise/litellm_enterprise/integrations/prometheus.py @@ -794,9 +794,16 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti output_tokens = standard_logging_payload["completion_tokens"] tokens_used = standard_logging_payload["total_tokens"] response_cost = standard_logging_payload["response_cost"] - _requester_metadata = standard_logging_payload["metadata"].get( + _requester_metadata: Optional[dict] = standard_logging_payload["metadata"].get( "requester_metadata" ) + user_api_key_auth_metadata: Optional[dict] = standard_logging_payload[ + "metadata" + ].get("user_api_key_auth_metadata") + combined_metadata: Dict[str, Any] = { + **(_requester_metadata if _requester_metadata else {}), + **(user_api_key_auth_metadata if user_api_key_auth_metadata else {}), + } if standard_logging_payload is not None and isinstance( standard_logging_payload, dict ): @@ -828,8 +835,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti exception_status=None, exception_class=None, custom_metadata_labels=get_custom_labels_from_metadata( - metadata=standard_logging_payload["metadata"].get("requester_metadata") - or {} + metadata=combined_metadata ), route=standard_logging_payload["metadata"].get( "user_api_key_request_route" diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 24449e1bd0f7..46e363c865da 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -4019,6 +4019,7 @@ def get_standard_logging_metadata( usage_object=usage_object, requester_custom_headers=None, cold_storage_object_key=None, + user_api_key_auth_metadata=None, ) if isinstance(metadata, dict): # Filter the metadata dictionary to include only the specified keys @@ -4685,6 +4686,7 @@ def get_standard_logging_metadata( requester_custom_headers=None, user_api_key_request_route=None, cold_storage_object_key=None, + user_api_key_auth_metadata=None, ) if isinstance(metadata, dict): # Update the clean_metadata with values from input metadata that match StandardLoggingMetadata fields diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 804cf2cf2cfc..96d9ea0cc316 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,30 +1,9 @@ model_list: - - model_name: byok-fixed-gpt-4o-mini + - model_name: openai/gpt-4o litellm_params: - model: openai/gpt-4o-mini - api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5" - api_key: dummy - - model_name: "byok-wildcard/*" - litellm_params: - model: openai/* - - model_name: xai-grok-3 - litellm_params: - model: xai/grok-3 - - model_name: hosted_vllm/whisper-v3 - litellm_params: - model: hosted_vllm/whisper-v3 - api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5" - api_key: dummy - -mcp_servers: - github_mcp: - url: "https://api.githubcopilot.com/mcp" - auth_type: oauth2 - authorization_url: https://github.com/login/oauth/authorize - token_url: https://github.com/login/oauth/access_token - client_id: os.environ/GITHUB_OAUTH_CLIENT_ID - client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET - scopes: ["public_repo", "user:email"] - allowed_tools: ["list_tools"] - # disallowed_tools: ["repo_delete"] + model: openai/gpt-4o + api_key: os.environ/OPENAI_API_KEY +litellm_settings: + callbacks: ["prometheus"] + custom_prometheus_metadata_labels: ["metadata.initiative"] \ No newline at end of file diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index c5370eb7d70d..00ffae718e16 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -3066,6 +3066,7 @@ class PassThroughEndpointLoggingTypedDict(TypedDict): "tags", "team_member_key_duration", "prompts", + "logging", ] diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index e077d0ee923f..73052d149577 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -579,7 +579,12 @@ def get_sanitized_user_information_from_key( user_api_key_end_user_id=user_api_key_dict.end_user_id, user_api_key_user_email=user_api_key_dict.user_email, user_api_key_request_route=user_api_key_dict.request_route, - user_api_key_budget_reset_at=user_api_key_dict.budget_reset_at.isoformat() if user_api_key_dict.budget_reset_at else None, + user_api_key_budget_reset_at=( + user_api_key_dict.budget_reset_at.isoformat() + if user_api_key_dict.budget_reset_at + else None + ), + user_api_key_auth_metadata=None, ) return user_api_key_logged_metadata @@ -607,6 +612,35 @@ def add_user_api_key_auth_to_request_metadata( ) return data + @staticmethod + def add_management_endpoint_metadata_to_request_metadata( + data: dict, + management_endpoint_metadata: dict, + _metadata_variable_name: str, + ) -> dict: + """ + Adds the `UserAPIKeyAuth` metadata to the request metadata. + + ignore any sensitive fields like logging, api_key, etc. + """ + from litellm.proxy._types import ( + LiteLLM_ManagementEndpoint_MetadataFields, + LiteLLM_ManagementEndpoint_MetadataFields_Premium, + ) + + # ignore any special fields + added_metadata = {} + for k, v in management_endpoint_metadata.items(): + if k not in ( + LiteLLM_ManagementEndpoint_MetadataFields_Premium + + LiteLLM_ManagementEndpoint_MetadataFields + ): + added_metadata[k] = v + data[_metadata_variable_name].setdefault( + "user_api_key_auth_metadata", {} + ).update(added_metadata) + return data + @staticmethod def add_key_level_controls( key_metadata: Optional[dict], data: dict, _metadata_variable_name: str @@ -651,6 +685,13 @@ def add_key_level_controls( key_metadata["disable_fallbacks"], bool ): data["disable_fallbacks"] = key_metadata["disable_fallbacks"] + + ## KEY-LEVEL METADATA + data = LiteLLMProxyRequestSetup.add_management_endpoint_metadata_to_request_metadata( + data=data, + management_endpoint_metadata=key_metadata, + _metadata_variable_name=_metadata_variable_name, + ) return data @staticmethod @@ -889,6 +930,15 @@ async def add_litellm_data_to_request( # noqa: PLR0915 "spend_logs_metadata" ] + ## TEAM-LEVEL METADATA + data = ( + LiteLLMProxyRequestSetup.add_management_endpoint_metadata_to_request_metadata( + data=data, + management_endpoint_metadata=team_metadata, + _metadata_variable_name=_metadata_variable_name, + ) + ) + # Team spend, budget - used by prometheus.py data[_metadata_variable_name][ "user_api_key_team_max_budget" diff --git a/litellm/types/utils.py b/litellm/types/utils.py index e5786e50a5de..c8de97bba203 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -123,12 +123,18 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False): max_output_tokens: Required[Optional[int]] input_cost_per_token: Required[float] input_cost_per_token_flex: Optional[float] # OpenAI flex service tier pricing - input_cost_per_token_priority: Optional[float] # OpenAI priority service tier pricing + input_cost_per_token_priority: Optional[ + float + ] # OpenAI priority service tier pricing cache_creation_input_token_cost: Optional[float] cache_creation_input_token_cost_above_1hr: Optional[float] cache_read_input_token_cost: Optional[float] - cache_read_input_token_cost_flex: Optional[float] # OpenAI flex service tier pricing - cache_read_input_token_cost_priority: Optional[float] # OpenAI priority service tier pricing + cache_read_input_token_cost_flex: Optional[ + float + ] # OpenAI flex service tier pricing + cache_read_input_token_cost_priority: Optional[ + float + ] # OpenAI priority service tier pricing input_cost_per_character: Optional[float] # only for vertex ai models input_cost_per_audio_token: Optional[float] input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models @@ -147,7 +153,9 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False): output_cost_per_token_batches: Optional[float] output_cost_per_token: Required[float] output_cost_per_token_flex: Optional[float] # OpenAI flex service tier pricing - output_cost_per_token_priority: Optional[float] # OpenAI priority service tier pricing + output_cost_per_token_priority: Optional[ + float + ] # OpenAI priority service tier pricing output_cost_per_character: Optional[float] # only for vertex ai models output_cost_per_audio_token: Optional[float] output_cost_per_token_above_128k_tokens: Optional[ @@ -1856,6 +1864,7 @@ class StandardLoggingUserAPIKeyMetadata(TypedDict): user_api_key_team_alias: Optional[str] user_api_key_end_user_id: Optional[str] user_api_key_request_route: Optional[str] + user_api_key_auth_metadata: Optional[Dict[str, str]] class StandardLoggingMCPToolCall(TypedDict, total=False): @@ -2059,10 +2068,12 @@ class StandardLoggingGuardrailInformation(TypedDict, total=False): StandardLoggingPayloadStatus = Literal["success", "failure"] + class CachingDetails(TypedDict): """ Track all caching related metrics, fields for a given request """ + cache_hit: Optional[bool] """ Whether the request hit the cache @@ -2072,12 +2083,16 @@ class CachingDetails(TypedDict): Duration for reading from cache """ + class CostBreakdown(TypedDict): """ Detailed cost breakdown for a request """ + input_cost: float # Cost of input/prompt tokens - output_cost: float # Cost of output/completion tokens (includes reasoning if applicable) + output_cost: ( + float # Cost of output/completion tokens (includes reasoning if applicable) + ) total_cost: float # Total cost (input + output + tool usage) tool_usage_cost: float # Cost of usage of built-in tools @@ -2616,6 +2631,7 @@ class SpecialEnums(Enum): class ServiceTier(Enum): """Enum for service tier types used in cost calculations.""" + FLEX = "flex" PRIORITY = "priority" @@ -2662,13 +2678,14 @@ class CallbacksByType(TypedDict): class PriorityReservationSettings(BaseModel): """ Settings for priority-based rate limiting reservation. - + Defines what priority to assign to keys without explicit priority metadata. The priority_reservation mapping is configured separately via litellm.priority_reservation. """ + default_priority: float = Field( default=0.5, - description="Priority level to assign to API keys without explicit priority metadata. Should match a key in litellm.priority_reservation." + description="Priority level to assign to API keys without explicit priority metadata. Should match a key in litellm.priority_reservation.", ) model_config = ConfigDict(protected_namespaces=()) From a1a0e99638ebca998db597649b679a4f1d869a81 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 30 Sep 2025 21:23:25 -0700 Subject: [PATCH 4/4] fix(prometheus.py): working e2e calls w/ userapikeymetadata --- .../litellm_enterprise/integrations/prometheus.py | 11 +++++------ litellm/proxy/_new_secret_config.yaml | 2 +- litellm/proxy/litellm_pre_call_utils.py | 8 +++++--- litellm/types/integrations/prometheus.py | 8 ++++---- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/enterprise/litellm_enterprise/integrations/prometheus.py b/enterprise/litellm_enterprise/integrations/prometheus.py index b472ccbb357b..3b37e14b8969 100644 --- a/enterprise/litellm_enterprise/integrations/prometheus.py +++ b/enterprise/litellm_enterprise/integrations/prometheus.py @@ -21,6 +21,7 @@ from litellm.integrations.custom_logger import CustomLogger from litellm.proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth from litellm.types.integrations.prometheus import * +from litellm.types.integrations.prometheus import _sanitize_prometheus_label_name from litellm.types.utils import StandardLoggingPayload from litellm.utils import get_end_user_id_for_cost_tracking @@ -2247,12 +2248,10 @@ def prometheus_label_factory( if enum_values.custom_metadata_labels is not None: for key, value in enum_values.custom_metadata_labels.items(): - if key in supported_enum_labels: - filtered_labels[key] = value - else: - filtered_labels[key] = ( - "None" # this happens for dynamically added metadata labels - ) + # check sanitized key + sanitized_key = _sanitize_prometheus_label_name(key) + if sanitized_key in supported_enum_labels: + filtered_labels[sanitized_key] = value # Add custom tags if configured if enum_values.tags is not None: diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 96d9ea0cc316..5d8052493a15 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -6,4 +6,4 @@ model_list: litellm_settings: callbacks: ["prometheus"] - custom_prometheus_metadata_labels: ["metadata.initiative"] \ No newline at end of file + custom_prometheus_metadata_labels: ["metadata.initiative", "metadata.business-unit"] \ No newline at end of file diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 73052d149577..44e26313f952 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -636,9 +636,11 @@ def add_management_endpoint_metadata_to_request_metadata( + LiteLLM_ManagementEndpoint_MetadataFields ): added_metadata[k] = v - data[_metadata_variable_name].setdefault( - "user_api_key_auth_metadata", {} - ).update(added_metadata) + if data[_metadata_variable_name].get("user_api_key_auth_metadata") is None: + data[_metadata_variable_name]["user_api_key_auth_metadata"] = {} + data[_metadata_variable_name]["user_api_key_auth_metadata"].update( + added_metadata + ) return data @staticmethod diff --git a/litellm/types/integrations/prometheus.py b/litellm/types/integrations/prometheus.py index 9c1a14a830e9..a3dd4dcb1c60 100644 --- a/litellm/types/integrations/prometheus.py +++ b/litellm/types/integrations/prometheus.py @@ -426,13 +426,13 @@ class PrometheusMetricLabels: # Buffer monitoring metrics - these typically don't need additional labels litellm_pod_lock_manager_size: List[str] = [] - + litellm_in_memory_daily_spend_update_queue_size: List[str] = [] - + litellm_redis_daily_spend_update_queue_size: List[str] = [] - + litellm_in_memory_spend_update_queue_size: List[str] = [] - + litellm_redis_spend_update_queue_size: List[str] = [] @staticmethod