Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions litellm/integrations/opentelemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,12 +247,12 @@ def _init_metrics(self, meter_provider):
metrics.set_meter_provider(meter_provider)

self._operation_duration_histogram = meter.create_histogram(
name="gen_ai.client.operation.duration", # Replace with semconv constant in otel 1.38
name="gen_ai.client.operation.duration", # Replace with semconv constant in otel 1.38
description="GenAI operation duration",
unit="s",
)
self._token_usage_histogram = meter.create_histogram(
name="gen_ai.client.token.usage", # Replace with semconv constant in otel 1.38
name="gen_ai.client.token.usage", # Replace with semconv constant in otel 1.38
description="GenAI token usage",
unit="{token}",
)
Expand Down Expand Up @@ -480,9 +480,9 @@ def get_tracer_to_use_for_request(self, kwargs: dict) -> Tracer:

def _get_dynamic_otel_headers_from_kwargs(self, kwargs) -> Optional[dict]:
"""Extract dynamic headers from kwargs if available."""
standard_callback_dynamic_params: Optional[
StandardCallbackDynamicParams
] = kwargs.get("standard_callback_dynamic_params")
standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
kwargs.get("standard_callback_dynamic_params")
)

if not standard_callback_dynamic_params:
return None
Expand Down Expand Up @@ -543,7 +543,7 @@ def _handle_success(self, kwargs, response_obj, start_time, end_time):
# 4. Metrics & cost recording
self._record_metrics(kwargs, response_obj, start_time, end_time)

# 5. Semantic logs.
# 5. Semantic logs.
if self.config.enable_events:
self._emit_semantic_logs(kwargs, response_obj, span)

Expand Down Expand Up @@ -581,7 +581,6 @@ def _maybe_log_raw_request(

raw_span_name = generation_name if generation_name else RAW_REQUEST_SPAN_NAME


otel_tracer: Tracer = self.get_tracer_to_use_for_request(kwargs)
raw_span = otel_tracer.start_span(
name=raw_span_name,
Expand Down Expand Up @@ -653,6 +652,7 @@ def _emit_semantic_logs(self, kwargs, response_obj, span: Span):
return

from opentelemetry._logs import LogRecord, get_logger

otel_logger = get_logger(LITELLM_LOGGER_NAME)

parent_ctx = span.get_span_context()
Expand Down Expand Up @@ -708,7 +708,6 @@ def _emit_semantic_logs(self, kwargs, response_obj, span: Span):
)
)


def _create_guardrail_span(
self, kwargs: Optional[dict], context: Optional[Context]
):
Expand Down Expand Up @@ -1177,8 +1176,8 @@ def _to_ns(self, dt):
return int(dt.timestamp() * 1e9)

def _get_span_name(self, kwargs):
litellm_params = kwargs.get("litellm_params", {})
metadata = litellm_params.get("metadata", {})
litellm_params = kwargs.get("litellm_params") or {}
metadata = litellm_params.get("metadata") or {}
generation_name = metadata.get("generation_name")

if generation_name:
Expand Down
20 changes: 20 additions & 0 deletions litellm/model_prices_and_context_window_backup.json
Original file line number Diff line number Diff line change
Expand Up @@ -4801,6 +4801,10 @@
"cache_creation_input_token_cost": 3.75e-06,
"cache_read_input_token_cost": 3e-07,
"input_cost_per_token": 3e-06,
"input_cost_per_token_above_200k_tokens": 6e-06,
"output_cost_per_token_above_200k_tokens": 2.25e-05,
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
"litellm_provider": "anthropic",
"max_input_tokens": 200000,
"max_output_tokens": 64000,
Expand All @@ -4827,6 +4831,10 @@
"cache_creation_input_token_cost": 3.75e-06,
"cache_read_input_token_cost": 3e-07,
"input_cost_per_token": 3e-06,
"input_cost_per_token_above_200k_tokens": 6e-06,
"output_cost_per_token_above_200k_tokens": 2.25e-05,
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
"litellm_provider": "anthropic",
"max_input_tokens": 200000,
"max_output_tokens": 64000,
Expand Down Expand Up @@ -19720,6 +19728,10 @@
"cache_creation_input_token_cost": 3.75e-06,
"cache_read_input_token_cost": 3e-07,
"input_cost_per_token": 3e-06,
"input_cost_per_token_above_200k_tokens": 6e-06,
"output_cost_per_token_above_200k_tokens": 2.25e-05,
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
"litellm_provider": "bedrock_converse",
"max_input_tokens": 200000,
"max_output_tokens": 64000,
Expand Down Expand Up @@ -21086,6 +21098,10 @@
"cache_creation_input_token_cost": 3.75e-06,
"cache_read_input_token_cost": 3e-07,
"input_cost_per_token": 3e-06,
"input_cost_per_token_above_200k_tokens": 6e-06,
"output_cost_per_token_above_200k_tokens": 2.25e-05,
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
"input_cost_per_token_batches": 1.5e-06,
"litellm_provider": "vertex_ai-anthropic_models",
"max_input_tokens": 200000,
Expand All @@ -21108,6 +21124,10 @@
"cache_creation_input_token_cost": 3.75e-06,
"cache_read_input_token_cost": 3e-07,
"input_cost_per_token": 3e-06,
"input_cost_per_token_above_200k_tokens": 6e-06,
"output_cost_per_token_above_200k_tokens": 2.25e-05,
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
"input_cost_per_token_batches": 1.5e-06,
"litellm_provider": "vertex_ai-anthropic_models",
"max_input_tokens": 200000,
Expand Down
39 changes: 24 additions & 15 deletions litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ async def make_bedrock_api_request(
messages: Optional[List[AllMessageValues]] = None,
response: Optional[Union[Any, litellm.ModelResponse]] = None,
request_data: Optional[dict] = None,
) -> BedrockGuardrailResponse:
) -> Optional[BedrockGuardrailResponse]:
from datetime import datetime

start_time = datetime.now()
Expand All @@ -348,13 +348,18 @@ async def make_bedrock_api_request(
if request_data.get("api_key") is not None:
api_key = request_data["api_key"]

## skip content if empty content
if not bedrock_request_data.get("content"):
return

prepared_request = self._prepare_request(
credentials=credentials,
data=bedrock_request_data,
optional_params=self.optional_params,
aws_region_name=aws_region_name,
api_key=api_key,
)

verbose_proxy_logger.debug(
"Bedrock AI request body: %s, url %s, headers: %s",
bedrock_request_data,
Expand Down Expand Up @@ -385,7 +390,7 @@ async def make_bedrock_api_request(
)
# Re-raise the exception to maintain existing behavior
raise

#########################################################
# Add guardrail information to request trace
#########################################################
Expand Down Expand Up @@ -460,7 +465,7 @@ def _get_bedrock_guardrail_response_status(
) -> GuardrailStatus:
"""
Get the status of the bedrock guardrail response.

Returns:
"success": Content allowed through with no violations
"guardrail_intervened": Content blocked due to policy violations
Expand All @@ -469,16 +474,18 @@ def _get_bedrock_guardrail_response_status(
if response.status_code == 200:
if self._check_bedrock_response_for_exception(response):
return "guardrail_failed_to_respond"

# Check if the guardrail would block content
try:
_json_response = response.json()
bedrock_guardrail_response = BedrockGuardrailResponse(**_json_response)
if self._should_raise_guardrail_blocked_exception(bedrock_guardrail_response):
if self._should_raise_guardrail_blocked_exception(
bedrock_guardrail_response
):
return "guardrail_intervened"
except Exception:
pass

return "success"
return "guardrail_failed_to_respond"

Expand Down Expand Up @@ -630,6 +637,9 @@ async def async_pre_call_hook(
#########################################################
########## 2. Update the messages with the guardrail response ##########
#########################################################
if bedrock_guardrail_response is None:
return data

data["messages"] = (
self._update_messages_with_updated_bedrock_guardrail_response(
messages=new_messages,
Expand Down Expand Up @@ -685,6 +695,9 @@ async def async_moderation_hook(
#########################################################
########## 2. Update the messages with the guardrail response ##########
#########################################################
if bedrock_guardrail_response is None:
return data

data["messages"] = (
self._update_messages_with_updated_bedrock_guardrail_response(
messages=new_messages,
Expand Down Expand Up @@ -727,15 +740,6 @@ async def async_post_call_success_hook(
)
return

outputs: List[BedrockGuardrailOutput] = (
response.get("outputs", []) or []
)
if not any(output.get("text") for output in outputs):
verbose_proxy_logger.warning(
"Bedrock AI: not running guardrail. No output text in response"
)
return

#########################################################
########## 1. Make parallel Bedrock API requests ##########
#########################################################
Expand All @@ -746,6 +750,9 @@ async def async_post_call_success_hook(
#########################################################
########## 2. Apply masking to response with output guardrail response ##########
#########################################################
if output_content_bedrock is None:
return

self._apply_masking_to_response(
response=response,
bedrock_guardrail_response=output_content_bedrock,
Expand Down Expand Up @@ -850,6 +857,8 @@ async def async_post_call_streaming_iterator_hook(
#########################################################################
########## 2. Apply masking to response with output guardrail response ##########
#########################################################################
if output_guardrail_response is None:
return
self._apply_masking_to_response(
response=assembled_model_response,
bedrock_guardrail_response=output_guardrail_response,
Expand Down
Loading
Loading