Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 30 additions & 8 deletions enterprise/litellm_enterprise/integrations/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import LiteLLM_TeamTable, UserAPIKeyAuth
from litellm.types.integrations.prometheus import *
from litellm.types.integrations.prometheus import _sanitize_prometheus_label_name
from litellm.types.utils import StandardLoggingPayload
from litellm.utils import get_end_user_id_for_cost_tracking

Expand Down Expand Up @@ -794,9 +795,16 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
output_tokens = standard_logging_payload["completion_tokens"]
tokens_used = standard_logging_payload["total_tokens"]
response_cost = standard_logging_payload["response_cost"]
_requester_metadata = standard_logging_payload["metadata"].get(
_requester_metadata: Optional[dict] = standard_logging_payload["metadata"].get(
"requester_metadata"
)
user_api_key_auth_metadata: Optional[dict] = standard_logging_payload[
"metadata"
].get("user_api_key_auth_metadata")
combined_metadata: Dict[str, Any] = {
**(_requester_metadata if _requester_metadata else {}),
**(user_api_key_auth_metadata if user_api_key_auth_metadata else {}),
}
if standard_logging_payload is not None and isinstance(
standard_logging_payload, dict
):
Expand Down Expand Up @@ -828,8 +836,7 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
exception_status=None,
exception_class=None,
custom_metadata_labels=get_custom_labels_from_metadata(
metadata=standard_logging_payload["metadata"].get("requester_metadata")
or {}
metadata=combined_metadata
),
route=standard_logging_payload["metadata"].get(
"user_api_key_request_route"
Expand Down Expand Up @@ -1649,9 +1656,22 @@ def set_litellm_deployment_state(
api_base: Optional[str],
api_provider: str,
):
self.litellm_deployment_state.labels(
litellm_model_name, model_id, api_base, api_provider
).set(state)
"""
Set the deployment state.
"""
### get labels
_labels = prometheus_label_factory(
supported_enum_labels=self.get_labels_for_metric(
metric_name="litellm_deployment_state"
),
enum_values=UserAPIKeyLabelValues(
litellm_model_name=litellm_model_name,
model_id=model_id,
api_base=api_base,
api_provider=api_provider,
),
)
self.litellm_deployment_state.labels(**_labels).set(state)

def set_deployment_healthy(
self,
Expand Down Expand Up @@ -2228,8 +2248,10 @@ def prometheus_label_factory(

if enum_values.custom_metadata_labels is not None:
for key, value in enum_values.custom_metadata_labels.items():
if key in supported_enum_labels:
filtered_labels[key] = value
# check sanitized key
sanitized_key = _sanitize_prometheus_label_name(key)
if sanitized_key in supported_enum_labels:
filtered_labels[sanitized_key] = value

# Add custom tags if configured
if enum_values.tags is not None:
Expand Down
2 changes: 2 additions & 0 deletions litellm/litellm_core_utils/litellm_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -4040,6 +4040,7 @@ def get_standard_logging_metadata(
usage_object=usage_object,
requester_custom_headers=None,
cold_storage_object_key=None,
user_api_key_auth_metadata=None,
)
if isinstance(metadata, dict):
# Filter the metadata dictionary to include only the specified keys
Expand Down Expand Up @@ -4755,6 +4756,7 @@ def get_standard_logging_metadata(
requester_custom_headers=None,
user_api_key_request_route=None,
cold_storage_object_key=None,
user_api_key_auth_metadata=None,
)
if isinstance(metadata, dict):
# Update the clean_metadata with values from input metadata that match StandardLoggingMetadata fields
Expand Down
33 changes: 6 additions & 27 deletions litellm/proxy/_new_secret_config.yaml
Original file line number Diff line number Diff line change
@@ -1,30 +1,9 @@
model_list:
- model_name: byok-fixed-gpt-4o-mini
- model_name: openai/gpt-4o
litellm_params:
model: openai/gpt-4o-mini
api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
api_key: dummy
- model_name: "byok-wildcard/*"
litellm_params:
model: openai/*
- model_name: xai-grok-3
litellm_params:
model: xai/grok-3
- model_name: hosted_vllm/whisper-v3
litellm_params:
model: hosted_vllm/whisper-v3
api_base: "https://webhook.site/2f385e05-00aa-402b-86d1-efc9261471a5"
api_key: dummy

mcp_servers:
github_mcp:
url: "https://api.githubcopilot.com/mcp"
auth_type: oauth2
authorization_url: https://github.com/login/oauth/authorize
token_url: https://github.com/login/oauth/access_token
client_id: os.environ/GITHUB_OAUTH_CLIENT_ID
client_secret: os.environ/GITHUB_OAUTH_CLIENT_SECRET
scopes: ["public_repo", "user:email"]
allowed_tools: ["list_tools"]
# disallowed_tools: ["repo_delete"]
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY

litellm_settings:
callbacks: ["prometheus"]
custom_prometheus_metadata_labels: ["metadata.initiative", "metadata.business-unit"]
1 change: 1 addition & 0 deletions litellm/proxy/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3068,6 +3068,7 @@ class PassThroughEndpointLoggingTypedDict(TypedDict):
"tags",
"team_member_key_duration",
"prompts",
"logging",
]


Expand Down
54 changes: 53 additions & 1 deletion litellm/proxy/litellm_pre_call_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,12 @@ def get_sanitized_user_information_from_key(
user_api_key_end_user_id=user_api_key_dict.end_user_id,
user_api_key_user_email=user_api_key_dict.user_email,
user_api_key_request_route=user_api_key_dict.request_route,
user_api_key_budget_reset_at=user_api_key_dict.budget_reset_at.isoformat() if user_api_key_dict.budget_reset_at else None,
user_api_key_budget_reset_at=(
user_api_key_dict.budget_reset_at.isoformat()
if user_api_key_dict.budget_reset_at
else None
),
user_api_key_auth_metadata=None,
)
return user_api_key_logged_metadata

Expand Down Expand Up @@ -607,6 +612,37 @@ def add_user_api_key_auth_to_request_metadata(
)
return data

@staticmethod
def add_management_endpoint_metadata_to_request_metadata(
data: dict,
management_endpoint_metadata: dict,
_metadata_variable_name: str,
) -> dict:
"""
Adds the `UserAPIKeyAuth` metadata to the request metadata.

ignore any sensitive fields like logging, api_key, etc.
"""
from litellm.proxy._types import (
LiteLLM_ManagementEndpoint_MetadataFields,
LiteLLM_ManagementEndpoint_MetadataFields_Premium,
)

# ignore any special fields
added_metadata = {}
for k, v in management_endpoint_metadata.items():
if k not in (
LiteLLM_ManagementEndpoint_MetadataFields_Premium
+ LiteLLM_ManagementEndpoint_MetadataFields
):
added_metadata[k] = v
if data[_metadata_variable_name].get("user_api_key_auth_metadata") is None:
data[_metadata_variable_name]["user_api_key_auth_metadata"] = {}
data[_metadata_variable_name]["user_api_key_auth_metadata"].update(
added_metadata
)
return data

@staticmethod
def add_key_level_controls(
key_metadata: Optional[dict], data: dict, _metadata_variable_name: str
Expand Down Expand Up @@ -651,6 +687,13 @@ def add_key_level_controls(
key_metadata["disable_fallbacks"], bool
):
data["disable_fallbacks"] = key_metadata["disable_fallbacks"]

## KEY-LEVEL METADATA
data = LiteLLMProxyRequestSetup.add_management_endpoint_metadata_to_request_metadata(
data=data,
management_endpoint_metadata=key_metadata,
_metadata_variable_name=_metadata_variable_name,
)
return data

@staticmethod
Expand Down Expand Up @@ -889,6 +932,15 @@ async def add_litellm_data_to_request( # noqa: PLR0915
"spend_logs_metadata"
]

## TEAM-LEVEL METADATA
data = (
LiteLLMProxyRequestSetup.add_management_endpoint_metadata_to_request_metadata(
data=data,
management_endpoint_metadata=team_metadata,
_metadata_variable_name=_metadata_variable_name,
)
)

# Team spend, budget - used by prometheus.py
data[_metadata_variable_name][
"user_api_key_team_max_budget"
Expand Down
58 changes: 31 additions & 27 deletions litellm/proxy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1395,9 +1395,12 @@ async def post_call_success_hook(
3. /image/generation
4. /files
"""
from litellm.types.guardrails import GuardrailEventHooks

for callback in litellm.callbacks:
try:
guardrail_callbacks: List[CustomGuardrail] = []
other_callbacks: List[CustomLogger] = []
try:
for callback in litellm.callbacks:
_callback: Optional[CustomLogger] = None
if isinstance(callback, str):
_callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
Expand All @@ -1407,36 +1410,37 @@ async def post_call_success_hook(
_callback = callback # type: ignore

if _callback is not None:
if isinstance(_callback, CustomGuardrail):
guardrail_callbacks.append(_callback)
else:
other_callbacks.append(_callback)
############## Handle Guardrails ########################################
#############################################################################
if isinstance(callback, CustomGuardrail):
# Main - V2 Guardrails implementation
from litellm.types.guardrails import GuardrailEventHooks

if (
callback.should_run_guardrail(
data=data, event_type=GuardrailEventHooks.post_call
)
is not True
):
continue
for callback in guardrail_callbacks:
# Main - V2 Guardrails implementation
if (
callback.should_run_guardrail(
data=data, event_type=GuardrailEventHooks.post_call
)
is not True
):
continue

await callback.async_post_call_success_hook(
user_api_key_dict=user_api_key_dict,
data=data,
response=response,
)
await callback.async_post_call_success_hook(
user_api_key_dict=user_api_key_dict,
data=data,
response=response,
)

############ Handle CustomLogger ###############################
#################################################################
elif isinstance(_callback, CustomLogger):
await _callback.async_post_call_success_hook(
user_api_key_dict=user_api_key_dict,
data=data,
response=response,
)
except Exception as e:
raise e
############ Handle CustomLogger ###############################
#################################################################
for callback in other_callbacks:
await callback.async_post_call_success_hook(
user_api_key_dict=user_api_key_dict, data=data, response=response
)
except Exception as e:
raise e
return response

async def async_post_call_streaming_hook(
Expand Down
8 changes: 4 additions & 4 deletions litellm/types/integrations/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,13 +426,13 @@ class PrometheusMetricLabels:

# Buffer monitoring metrics - these typically don't need additional labels
litellm_pod_lock_manager_size: List[str] = []

litellm_in_memory_daily_spend_update_queue_size: List[str] = []

litellm_redis_daily_spend_update_queue_size: List[str] = []

litellm_in_memory_spend_update_queue_size: List[str] = []

litellm_redis_spend_update_queue_size: List[str] = []

@staticmethod
Expand Down
9 changes: 8 additions & 1 deletion litellm/types/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1867,6 +1867,7 @@ class StandardLoggingUserAPIKeyMetadata(TypedDict):
user_api_key_team_alias: Optional[str]
user_api_key_end_user_id: Optional[str]
user_api_key_request_route: Optional[str]
user_api_key_auth_metadata: Optional[Dict[str, str]]


class StandardLoggingMCPToolCall(TypedDict, total=False):
Expand Down Expand Up @@ -2077,10 +2078,12 @@ class StandardLoggingGuardrailInformation(TypedDict, total=False):

StandardLoggingPayloadStatus = Literal["success", "failure"]


class CachingDetails(TypedDict):
"""
Track all caching related metrics, fields for a given request
"""

cache_hit: Optional[bool]
"""
Whether the request hit the cache
Expand All @@ -2090,12 +2093,16 @@ class CachingDetails(TypedDict):
Duration for reading from cache
"""


class CostBreakdown(TypedDict):
"""
Detailed cost breakdown for a request
"""

input_cost: float # Cost of input/prompt tokens
output_cost: float # Cost of output/completion tokens (includes reasoning if applicable)
output_cost: (
float # Cost of output/completion tokens (includes reasoning if applicable)
)
total_cost: float # Total cost (input + output + tool usage)
tool_usage_cost: float # Cost of usage of built-in tools

Expand Down
Loading