BerriAI · krrishdholakia · Oct 3, 2025 · Oct 3, 2025
diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py
@@ -247,12 +247,12 @@ def _init_metrics(self, meter_provider):
         metrics.set_meter_provider(meter_provider)
 
         self._operation_duration_histogram = meter.create_histogram(
-            name="gen_ai.client.operation.duration", # Replace with semconv constant in otel 1.38
+            name="gen_ai.client.operation.duration",  # Replace with semconv constant in otel 1.38
             description="GenAI operation duration",
             unit="s",
         )
         self._token_usage_histogram = meter.create_histogram(
-            name="gen_ai.client.token.usage", # Replace with semconv constant in otel 1.38
+            name="gen_ai.client.token.usage",  # Replace with semconv constant in otel 1.38
             description="GenAI token usage",
             unit="{token}",
         )
@@ -480,9 +480,9 @@ def get_tracer_to_use_for_request(self, kwargs: dict) -> Tracer:
 
     def _get_dynamic_otel_headers_from_kwargs(self, kwargs) -> Optional[dict]:
         """Extract dynamic headers from kwargs if available."""
-        standard_callback_dynamic_params: Optional[
-            StandardCallbackDynamicParams
-        ] = kwargs.get("standard_callback_dynamic_params")
+        standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = (
+            kwargs.get("standard_callback_dynamic_params")
+        )
 
         if not standard_callback_dynamic_params:
             return None
@@ -543,7 +543,7 @@ def _handle_success(self, kwargs, response_obj, start_time, end_time):
         # 4. Metrics & cost recording
         self._record_metrics(kwargs, response_obj, start_time, end_time)
 
-        # 5. Semantic logs. 
+        # 5. Semantic logs.
         if self.config.enable_events:
             self._emit_semantic_logs(kwargs, response_obj, span)
 
@@ -581,7 +581,6 @@ def _maybe_log_raw_request(
 
         raw_span_name = generation_name if generation_name else RAW_REQUEST_SPAN_NAME
 
-
         otel_tracer: Tracer = self.get_tracer_to_use_for_request(kwargs)
         raw_span = otel_tracer.start_span(
             name=raw_span_name,
@@ -653,6 +652,7 @@ def _emit_semantic_logs(self, kwargs, response_obj, span: Span):
             return
 
         from opentelemetry._logs import LogRecord, get_logger
+
         otel_logger = get_logger(LITELLM_LOGGER_NAME)
 
         parent_ctx = span.get_span_context()
@@ -708,7 +708,6 @@ def _emit_semantic_logs(self, kwargs, response_obj, span: Span):
                 )
             )
 
-
     def _create_guardrail_span(
         self, kwargs: Optional[dict], context: Optional[Context]
     ):
@@ -1177,8 +1176,8 @@ def _to_ns(self, dt):
         return int(dt.timestamp() * 1e9)
 
     def _get_span_name(self, kwargs):
-        litellm_params = kwargs.get("litellm_params", {})
-        metadata = litellm_params.get("metadata", {})
+        litellm_params = kwargs.get("litellm_params") or {}
+        metadata = litellm_params.get("metadata") or {}
         generation_name = metadata.get("generation_name")
 
         if generation_name:

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -4801,6 +4801,10 @@
         "cache_creation_input_token_cost": 3.75e-06,
         "cache_read_input_token_cost": 3e-07,
         "input_cost_per_token": 3e-06,
+        "input_cost_per_token_above_200k_tokens": 6e-06,
+        "output_cost_per_token_above_200k_tokens": 2.25e-05,
+        "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
+        "cache_read_input_token_cost_above_200k_tokens": 6e-07,
         "litellm_provider": "anthropic",
         "max_input_tokens": 200000,
         "max_output_tokens": 64000,
@@ -4827,6 +4831,10 @@
         "cache_creation_input_token_cost": 3.75e-06,
         "cache_read_input_token_cost": 3e-07,
         "input_cost_per_token": 3e-06,
+        "input_cost_per_token_above_200k_tokens": 6e-06,
+        "output_cost_per_token_above_200k_tokens": 2.25e-05,
+        "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
+        "cache_read_input_token_cost_above_200k_tokens": 6e-07,
         "litellm_provider": "anthropic",
         "max_input_tokens": 200000,
         "max_output_tokens": 64000,
@@ -19720,6 +19728,10 @@
         "cache_creation_input_token_cost": 3.75e-06,
         "cache_read_input_token_cost": 3e-07,
         "input_cost_per_token": 3e-06,
+        "input_cost_per_token_above_200k_tokens": 6e-06,
+        "output_cost_per_token_above_200k_tokens": 2.25e-05,
+        "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
+        "cache_read_input_token_cost_above_200k_tokens": 6e-07,
         "litellm_provider": "bedrock_converse",
         "max_input_tokens": 200000,
         "max_output_tokens": 64000,
@@ -21086,6 +21098,10 @@
         "cache_creation_input_token_cost": 3.75e-06,
         "cache_read_input_token_cost": 3e-07,
         "input_cost_per_token": 3e-06,
+        "input_cost_per_token_above_200k_tokens": 6e-06,
+        "output_cost_per_token_above_200k_tokens": 2.25e-05,
+        "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
+        "cache_read_input_token_cost_above_200k_tokens": 6e-07,
         "input_cost_per_token_batches": 1.5e-06,
         "litellm_provider": "vertex_ai-anthropic_models",
         "max_input_tokens": 200000,
@@ -21108,6 +21124,10 @@
         "cache_creation_input_token_cost": 3.75e-06,
         "cache_read_input_token_cost": 3e-07,
         "input_cost_per_token": 3e-06,
+        "input_cost_per_token_above_200k_tokens": 6e-06,
+        "output_cost_per_token_above_200k_tokens": 2.25e-05,
+        "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
+        "cache_read_input_token_cost_above_200k_tokens": 6e-07,
         "input_cost_per_token_batches": 1.5e-06,
         "litellm_provider": "vertex_ai-anthropic_models",
         "max_input_tokens": 200000,

diff --git a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py
@@ -325,7 +325,7 @@ async def make_bedrock_api_request(
         messages: Optional[List[AllMessageValues]] = None,
         response: Optional[Union[Any, litellm.ModelResponse]] = None,
         request_data: Optional[dict] = None,
-    ) -> BedrockGuardrailResponse:
+    ) -> Optional[BedrockGuardrailResponse]:
         from datetime import datetime
 
         start_time = datetime.now()
@@ -348,13 +348,18 @@ async def make_bedrock_api_request(
             if request_data.get("api_key") is not None:
                 api_key = request_data["api_key"]
 
+        ## skip content if empty content
+        if not bedrock_request_data.get("content"):
+            return
+
         prepared_request = self._prepare_request(
             credentials=credentials,
             data=bedrock_request_data,
             optional_params=self.optional_params,
             aws_region_name=aws_region_name,
             api_key=api_key,
         )
+
         verbose_proxy_logger.debug(
             "Bedrock AI request body: %s, url %s, headers: %s",
             bedrock_request_data,
@@ -385,7 +390,7 @@ async def make_bedrock_api_request(
             )
             # Re-raise the exception to maintain existing behavior
             raise
-        
+
         #########################################################
         # Add guardrail information to request trace
         #########################################################
@@ -460,7 +465,7 @@ def _get_bedrock_guardrail_response_status(
     ) -> GuardrailStatus:
         """
         Get the status of the bedrock guardrail response.
-        
+
         Returns:
             "success": Content allowed through with no violations
             "guardrail_intervened": Content blocked due to policy violations
@@ -469,16 +474,18 @@ def _get_bedrock_guardrail_response_status(
         if response.status_code == 200:
             if self._check_bedrock_response_for_exception(response):
                 return "guardrail_failed_to_respond"
-            
+
             # Check if the guardrail would block content
             try:
                 _json_response = response.json()
                 bedrock_guardrail_response = BedrockGuardrailResponse(**_json_response)
-                if self._should_raise_guardrail_blocked_exception(bedrock_guardrail_response):
+                if self._should_raise_guardrail_blocked_exception(
+                    bedrock_guardrail_response
+                ):
                     return "guardrail_intervened"
             except Exception:
                 pass
-            
+
             return "success"
         return "guardrail_failed_to_respond"
 
@@ -630,6 +637,9 @@ async def async_pre_call_hook(
         #########################################################
         ########## 2. Update the messages with the guardrail response ##########
         #########################################################
+        if bedrock_guardrail_response is None:
+            return data
+
         data["messages"] = (
             self._update_messages_with_updated_bedrock_guardrail_response(
                 messages=new_messages,
@@ -685,6 +695,9 @@ async def async_moderation_hook(
         #########################################################
         ########## 2. Update the messages with the guardrail response ##########
         #########################################################
+        if bedrock_guardrail_response is None:
+            return data
+
         data["messages"] = (
             self._update_messages_with_updated_bedrock_guardrail_response(
                 messages=new_messages,
@@ -727,15 +740,6 @@ async def async_post_call_success_hook(
             )
             return
 
-        outputs: List[BedrockGuardrailOutput] = (
-            response.get("outputs", []) or []
-        )
-        if not any(output.get("text") for output in outputs):
-            verbose_proxy_logger.warning(
-                "Bedrock AI: not running guardrail. No output text in response"
-            )
-            return
-
         #########################################################
         ########## 1. Make parallel Bedrock API requests ##########
         #########################################################
@@ -746,6 +750,9 @@ async def async_post_call_success_hook(
         #########################################################
         ########## 2. Apply masking to response with output guardrail response ##########
         #########################################################
+        if output_content_bedrock is None:
+            return
+
         self._apply_masking_to_response(
             response=response,
             bedrock_guardrail_response=output_content_bedrock,
@@ -850,6 +857,8 @@ async def async_post_call_streaming_iterator_hook(
             #########################################################################
             ########## 2. Apply masking to response with output guardrail response ##########
             #########################################################################
+            if output_guardrail_response is None:
+                return
             self._apply_masking_to_response(
                 response=assembled_model_response,
                 bedrock_guardrail_response=output_guardrail_response,