diff --git a/enterprise/litellm_enterprise/integrations/prometheus.py b/enterprise/litellm_enterprise/integrations/prometheus.py index d3b0aefb86f6..3b37e14b8969 100644 --- a/enterprise/litellm_enterprise/integrations/prometheus.py +++ b/enterprise/litellm_enterprise/integrations/prometheus.py @@ -21,6 +21,7 @@ from litellm.integrations.custom_logger import CustomLogger from litellm.proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth from litellm.types.integrations.prometheus import * +from litellm.types.integrations.prometheus import _sanitize_prometheus_label_name from litellm.types.utils import StandardLoggingPayload from litellm.utils import get_end_user_id_for_cost_tracking @@ -794,9 +795,16 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti output_tokens = standard_logging_payload["completion_tokens"] tokens_used = standard_logging_payload["total_tokens"] response_cost = standard_logging_payload["response_cost"] - _requester_metadata = standard_logging_payload["metadata"].get( + _requester_metadata: Optional[dict] = standard_logging_payload["metadata"].get( "requester_metadata" ) + user_api_key_auth_metadata: Optional[dict] = standard_logging_payload[ + "metadata" + ].get("user_api_key_auth_metadata") + combined_metadata: Dict[str, Any] = { + **(_requester_metadata if _requester_metadata else {}), + **(user_api_key_auth_metadata if user_api_key_auth_metadata else {}), + } if standard_logging_payload is not None and isinstance( standard_logging_payload, dict ): @@ -828,8 +836,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti exception_status=None, exception_class=None, custom_metadata_labels=get_custom_labels_from_metadata( - metadata=standard_logging_payload["metadata"].get("requester_metadata") - or {} + metadata=combined_metadata ), route=standard_logging_payload["metadata"].get( "user_api_key_request_route" @@ -1649,9 +1656,22 @@ def set_litellm_deployment_state( api_base: Optional[str], api_provider: str, ): - self.litellm_deployment_state.labels( - litellm_model_name, model_id, api_base, api_provider - ).set(state) + """ + Set the deployment state. + """ + ### get labels + _labels = prometheus_label_factory( + supported_enum_labels=self.get_labels_for_metric( + metric_name="litellm_deployment_state" + ), + enum_values=UserAPIKeyLabelValues( + litellm_model_name=litellm_model_name, + model_id=model_id, + api_base=api_base, + api_provider=api_provider, + ), + ) + self.litellm_deployment_state.labels(**_labels).set(state) def set_deployment_healthy( self, @@ -2228,8 +2248,10 @@ def prometheus_label_factory( if enum_values.custom_metadata_labels is not None: for key, value in enum_values.custom_metadata_labels.items(): - if key in supported_enum_labels: - filtered_labels[key] = value + # check sanitized key + sanitized_key = _sanitize_prometheus_label_name(key) + if sanitized_key in supported_enum_labels: + filtered_labels[sanitized_key] = value # Add custom tags if configured if enum_values.tags is not None: diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index b5ab5aeefe35..696c67c44d71 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -4040,6 +4040,7 @@ def get_standard_logging_metadata( usage_object=usage_object, requester_custom_headers=None, cold_storage_object_key=None, + user_api_key_auth_metadata=None, ) if isinstance(metadata, dict): # Filter the metadata dictionary to include only the specified keys @@ -4755,6 +4756,7 @@ def get_standard_logging_metadata( requester_custom_headers=None, user_api_key_request_route=None, cold_storage_object_key=None, + user_api_key_auth_metadata=None, ) if isinstance(metadata, dict): # Update the clean_metadata with values from input metadata that match StandardLoggingMetadata fields diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 804cf2cf2cfc..5d8052493a15 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,30 +1,9 @@ model_list: - - model_name: byok-fixed-gpt-4o-mini + - model_name: openai/gpt-4o litellm_params: - model: openai/gpt-4o-mini - api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5" - api_key: dummy - - model_name: "byok-wildcard/*" - litellm_params: - model: openai/* - - model_name: xai-grok-3 - litellm_params: - model: xai/grok-3 - - model_name: hosted_vllm/whisper-v3 - litellm_params: - model: hosted_vllm/whisper-v3 - api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5" - api_key: dummy - -mcp_servers: - github_mcp: - url: "https://api.githubcopilot.com/mcp" - auth_type: oauth2 - authorization_url: https://github.com/login/oauth/authorize - token_url: https://github.com/login/oauth/access_token - client_id: os.environ/GITHUB_OAUTH_CLIENT_ID - client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET - scopes: ["public_repo", "user:email"] - allowed_tools: ["list_tools"] - # disallowed_tools: ["repo_delete"] + model: openai/gpt-4o + api_key: os.environ/OPENAI_API_KEY +litellm_settings: + callbacks: ["prometheus"] + custom_prometheus_metadata_labels: ["metadata.initiative", "metadata.business-unit"] \ No newline at end of file diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index efe7ff90973c..0d375f363e06 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -3068,6 +3068,7 @@ class PassThroughEndpointLoggingTypedDict(TypedDict): "tags", "team_member_key_duration", "prompts", + "logging", ] diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index e077d0ee923f..44e26313f952 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -579,7 +579,12 @@ def get_sanitized_user_information_from_key( user_api_key_end_user_id=user_api_key_dict.end_user_id, user_api_key_user_email=user_api_key_dict.user_email, user_api_key_request_route=user_api_key_dict.request_route, - user_api_key_budget_reset_at=user_api_key_dict.budget_reset_at.isoformat() if user_api_key_dict.budget_reset_at else None, + user_api_key_budget_reset_at=( + user_api_key_dict.budget_reset_at.isoformat() + if user_api_key_dict.budget_reset_at + else None + ), + user_api_key_auth_metadata=None, ) return user_api_key_logged_metadata @@ -607,6 +612,37 @@ def add_user_api_key_auth_to_request_metadata( ) return data + @staticmethod + def add_management_endpoint_metadata_to_request_metadata( + data: dict, + management_endpoint_metadata: dict, + _metadata_variable_name: str, + ) -> dict: + """ + Adds the `UserAPIKeyAuth` metadata to the request metadata. + + ignore any sensitive fields like logging, api_key, etc. + """ + from litellm.proxy._types import ( + LiteLLM_ManagementEndpoint_MetadataFields, + LiteLLM_ManagementEndpoint_MetadataFields_Premium, + ) + + # ignore any special fields + added_metadata = {} + for k, v in management_endpoint_metadata.items(): + if k not in ( + LiteLLM_ManagementEndpoint_MetadataFields_Premium + + LiteLLM_ManagementEndpoint_MetadataFields + ): + added_metadata[k] = v + if data[_metadata_variable_name].get("user_api_key_auth_metadata") is None: + data[_metadata_variable_name]["user_api_key_auth_metadata"] = {} + data[_metadata_variable_name]["user_api_key_auth_metadata"].update( + added_metadata + ) + return data + @staticmethod def add_key_level_controls( key_metadata: Optional[dict], data: dict, _metadata_variable_name: str @@ -651,6 +687,13 @@ def add_key_level_controls( key_metadata["disable_fallbacks"], bool ): data["disable_fallbacks"] = key_metadata["disable_fallbacks"] + + ## KEY-LEVEL METADATA + data = LiteLLMProxyRequestSetup.add_management_endpoint_metadata_to_request_metadata( + data=data, + management_endpoint_metadata=key_metadata, + _metadata_variable_name=_metadata_variable_name, + ) return data @staticmethod @@ -889,6 +932,15 @@ async def add_litellm_data_to_request( # noqa: PLR0915 "spend_logs_metadata" ] + ## TEAM-LEVEL METADATA + data = ( + LiteLLMProxyRequestSetup.add_management_endpoint_metadata_to_request_metadata( + data=data, + management_endpoint_metadata=team_metadata, + _metadata_variable_name=_metadata_variable_name, + ) + ) + # Team spend, budget - used by prometheus.py data[_metadata_variable_name][ "user_api_key_team_max_budget" diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 5b11c25b2bfe..d3d2972abfac 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -1395,9 +1395,12 @@ async def post_call_success_hook( 3. /image/generation 4. /files """ + from litellm.types.guardrails import GuardrailEventHooks - for callback in litellm.callbacks: - try: + guardrail_callbacks: List[CustomGuardrail] = [] + other_callbacks: List[CustomLogger] = [] + try: + for callback in litellm.callbacks: _callback: Optional[CustomLogger] = None if isinstance(callback, str): _callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class( @@ -1407,36 +1410,37 @@ async def post_call_success_hook( _callback = callback # type: ignore if _callback is not None: + if isinstance(_callback, CustomGuardrail): + guardrail_callbacks.append(_callback) + else: + other_callbacks.append(_callback) ############## Handle Guardrails ######################################## ############################################################################# - if isinstance(callback, CustomGuardrail): - # Main - V2 Guardrails implementation - from litellm.types.guardrails import GuardrailEventHooks - if ( - callback.should_run_guardrail( - data=data, event_type=GuardrailEventHooks.post_call - ) - is not True - ): - continue + for callback in guardrail_callbacks: + # Main - V2 Guardrails implementation + if ( + callback.should_run_guardrail( + data=data, event_type=GuardrailEventHooks.post_call + ) + is not True + ): + continue - await callback.async_post_call_success_hook( - user_api_key_dict=user_api_key_dict, - data=data, - response=response, - ) + await callback.async_post_call_success_hook( + user_api_key_dict=user_api_key_dict, + data=data, + response=response, + ) - ############ Handle CustomLogger ############################### - ################################################################# - elif isinstance(_callback, CustomLogger): - await _callback.async_post_call_success_hook( - user_api_key_dict=user_api_key_dict, - data=data, - response=response, - ) - except Exception as e: - raise e + ############ Handle CustomLogger ############################### + ################################################################# + for callback in other_callbacks: + await callback.async_post_call_success_hook( + user_api_key_dict=user_api_key_dict, data=data, response=response + ) + except Exception as e: + raise e return response async def async_post_call_streaming_hook( diff --git a/litellm/types/integrations/prometheus.py b/litellm/types/integrations/prometheus.py index 9c1a14a830e9..a3dd4dcb1c60 100644 --- a/litellm/types/integrations/prometheus.py +++ b/litellm/types/integrations/prometheus.py @@ -426,13 +426,13 @@ class PrometheusMetricLabels: # Buffer monitoring metrics - these typically don't need additional labels litellm_pod_lock_manager_size: List[str] = [] - + litellm_in_memory_daily_spend_update_queue_size: List[str] = [] - + litellm_redis_daily_spend_update_queue_size: List[str] = [] - + litellm_in_memory_spend_update_queue_size: List[str] = [] - + litellm_redis_spend_update_queue_size: List[str] = [] @staticmethod diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 166706634df3..0da0ba4e93d3 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -1867,6 +1867,7 @@ class StandardLoggingUserAPIKeyMetadata(TypedDict): user_api_key_team_alias: Optional[str] user_api_key_end_user_id: Optional[str] user_api_key_request_route: Optional[str] + user_api_key_auth_metadata: Optional[Dict[str, str]] class StandardLoggingMCPToolCall(TypedDict, total=False): @@ -2077,10 +2078,12 @@ class StandardLoggingGuardrailInformation(TypedDict, total=False): StandardLoggingPayloadStatus = Literal["success", "failure"] + class CachingDetails(TypedDict): """ Track all caching related metrics, fields for a given request """ + cache_hit: Optional[bool] """ Whether the request hit the cache @@ -2090,12 +2093,16 @@ class CachingDetails(TypedDict): Duration for reading from cache """ + class CostBreakdown(TypedDict): """ Detailed cost breakdown for a request """ + input_cost: float # Cost of input/prompt tokens - output_cost: float # Cost of output/completion tokens (includes reasoning if applicable) + output_cost: ( + float # Cost of output/completion tokens (includes reasoning if applicable) + ) total_cost: float # Total cost (input + output + tool usage) tool_usage_cost: float # Cost of usage of built-in tools