diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index a091633d7e28..0a7bdb6b3fd4 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -247,12 +247,12 @@ def _init_metrics(self, meter_provider): metrics.set_meter_provider(meter_provider) self._operation_duration_histogram = meter.create_histogram( - name="gen_ai.client.operation.duration", # Replace with semconv constant in otel 1.38 + name="gen_ai.client.operation.duration", # Replace with semconv constant in otel 1.38 description="GenAI operation duration", unit="s", ) self._token_usage_histogram = meter.create_histogram( - name="gen_ai.client.token.usage", # Replace with semconv constant in otel 1.38 + name="gen_ai.client.token.usage", # Replace with semconv constant in otel 1.38 description="GenAI token usage", unit="{token}", ) @@ -480,9 +480,9 @@ def get_tracer_to_use_for_request(self, kwargs: dict) -> Tracer: def _get_dynamic_otel_headers_from_kwargs(self, kwargs) -> Optional[dict]: """Extract dynamic headers from kwargs if available.""" - standard_callback_dynamic_params: Optional[ - StandardCallbackDynamicParams - ] = kwargs.get("standard_callback_dynamic_params") + standard_callback_dynamic_params: Optional[StandardCallbackDynamicParams] = ( + kwargs.get("standard_callback_dynamic_params") + ) if not standard_callback_dynamic_params: return None @@ -543,7 +543,7 @@ def _handle_success(self, kwargs, response_obj, start_time, end_time): # 4. Metrics & cost recording self._record_metrics(kwargs, response_obj, start_time, end_time) - # 5. Semantic logs. + # 5. Semantic logs. if self.config.enable_events: self._emit_semantic_logs(kwargs, response_obj, span) @@ -581,7 +581,6 @@ def _maybe_log_raw_request( raw_span_name = generation_name if generation_name else RAW_REQUEST_SPAN_NAME - otel_tracer: Tracer = self.get_tracer_to_use_for_request(kwargs) raw_span = otel_tracer.start_span( name=raw_span_name, @@ -653,6 +652,7 @@ def _emit_semantic_logs(self, kwargs, response_obj, span: Span): return from opentelemetry._logs import LogRecord, get_logger + otel_logger = get_logger(LITELLM_LOGGER_NAME) parent_ctx = span.get_span_context() @@ -708,7 +708,6 @@ def _emit_semantic_logs(self, kwargs, response_obj, span: Span): ) ) - def _create_guardrail_span( self, kwargs: Optional[dict], context: Optional[Context] ): @@ -1177,8 +1176,8 @@ def _to_ns(self, dt): return int(dt.timestamp() * 1e9) def _get_span_name(self, kwargs): - litellm_params = kwargs.get("litellm_params", {}) - metadata = litellm_params.get("metadata", {}) + litellm_params = kwargs.get("litellm_params") or {} + metadata = litellm_params.get("metadata") or {} generation_name = metadata.get("generation_name") if generation_name: diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index bae7c11e66c1..1b8f7f7c08fc 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -4801,6 +4801,10 @@ "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, @@ -4827,6 +4831,10 @@ "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, @@ -19720,6 +19728,10 @@ "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, @@ -21086,6 +21098,10 @@ "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, "input_cost_per_token_batches": 1.5e-06, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, @@ -21108,6 +21124,10 @@ "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, "input_cost_per_token_batches": 1.5e-06, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, diff --git a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py index c88ebe16d998..eae280ab23ba 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py +++ b/litellm/proxy/guardrails/guardrail_hooks/bedrock_guardrails.py @@ -325,7 +325,7 @@ async def make_bedrock_api_request( messages: Optional[List[AllMessageValues]] = None, response: Optional[Union[Any, litellm.ModelResponse]] = None, request_data: Optional[dict] = None, - ) -> BedrockGuardrailResponse: + ) -> Optional[BedrockGuardrailResponse]: from datetime import datetime start_time = datetime.now() @@ -348,6 +348,10 @@ async def make_bedrock_api_request( if request_data.get("api_key") is not None: api_key = request_data["api_key"] + ## skip content if empty content + if not bedrock_request_data.get("content"): + return + prepared_request = self._prepare_request( credentials=credentials, data=bedrock_request_data, @@ -355,6 +359,7 @@ async def make_bedrock_api_request( aws_region_name=aws_region_name, api_key=api_key, ) + verbose_proxy_logger.debug( "Bedrock AI request body: %s, url %s, headers: %s", bedrock_request_data, @@ -385,7 +390,7 @@ async def make_bedrock_api_request( ) # Re-raise the exception to maintain existing behavior raise - + ######################################################### # Add guardrail information to request trace ######################################################### @@ -460,7 +465,7 @@ def _get_bedrock_guardrail_response_status( ) -> GuardrailStatus: """ Get the status of the bedrock guardrail response. - + Returns: "success": Content allowed through with no violations "guardrail_intervened": Content blocked due to policy violations @@ -469,16 +474,18 @@ def _get_bedrock_guardrail_response_status( if response.status_code == 200: if self._check_bedrock_response_for_exception(response): return "guardrail_failed_to_respond" - + # Check if the guardrail would block content try: _json_response = response.json() bedrock_guardrail_response = BedrockGuardrailResponse(**_json_response) - if self._should_raise_guardrail_blocked_exception(bedrock_guardrail_response): + if self._should_raise_guardrail_blocked_exception( + bedrock_guardrail_response + ): return "guardrail_intervened" except Exception: pass - + return "success" return "guardrail_failed_to_respond" @@ -630,6 +637,9 @@ async def async_pre_call_hook( ######################################################### ########## 2. Update the messages with the guardrail response ########## ######################################################### + if bedrock_guardrail_response is None: + return data + data["messages"] = ( self._update_messages_with_updated_bedrock_guardrail_response( messages=new_messages, @@ -685,6 +695,9 @@ async def async_moderation_hook( ######################################################### ########## 2. Update the messages with the guardrail response ########## ######################################################### + if bedrock_guardrail_response is None: + return data + data["messages"] = ( self._update_messages_with_updated_bedrock_guardrail_response( messages=new_messages, @@ -727,15 +740,6 @@ async def async_post_call_success_hook( ) return - outputs: List[BedrockGuardrailOutput] = ( - response.get("outputs", []) or [] - ) - if not any(output.get("text") for output in outputs): - verbose_proxy_logger.warning( - "Bedrock AI: not running guardrail. No output text in response" - ) - return - ######################################################### ########## 1. Make parallel Bedrock API requests ########## ######################################################### @@ -746,6 +750,9 @@ async def async_post_call_success_hook( ######################################################### ########## 2. Apply masking to response with output guardrail response ########## ######################################################### + if output_content_bedrock is None: + return + self._apply_masking_to_response( response=response, bedrock_guardrail_response=output_content_bedrock, @@ -850,6 +857,8 @@ async def async_post_call_streaming_iterator_hook( ######################################################################### ########## 2. Apply masking to response with output guardrail response ########## ######################################################################### + if output_guardrail_response is None: + return self._apply_masking_to_response( response=assembled_model_response, bedrock_guardrail_response=output_guardrail_response, diff --git a/tests/guardrails_tests/test_bedrock_guardrails.py b/tests/guardrails_tests/test_bedrock_guardrails.py index c4d1655594be..a46b7f18db60 100644 --- a/tests/guardrails_tests/test_bedrock_guardrails.py +++ b/tests/guardrails_tests/test_bedrock_guardrails.py @@ -2,6 +2,8 @@ import os import io, asyncio import pytest +import httpx + sys.path.insert(0, os.path.abspath("../..")) import litellm from litellm.proxy.guardrails.guardrail_hooks.bedrock_guardrails import BedrockGuardrail @@ -9,11 +11,12 @@ from litellm.caching import DualCache from unittest.mock import MagicMock, AsyncMock, patch + @pytest.mark.asyncio async def test_bedrock_guardrails_pii_masking(): # Create proper mock objects mock_user_api_key_dict = UserAPIKeyAuth() - + guardrail = BedrockGuardrail( guardrailIdentifier="wf0hkdb5x07f", guardrailVersion="DRAFT", @@ -25,29 +28,39 @@ async def test_bedrock_guardrails_pii_masking(): {"role": "user", "content": "Hello, my phone number is +1 412 555 1212"}, {"role": "assistant", "content": "Hello, how can I help you today?"}, {"role": "user", "content": "I need to cancel my order"}, - {"role": "user", "content": "ok, my credit card number is 1234-5678-9012-3456"}, + { + "role": "user", + "content": "ok, my credit card number is 1234-5678-9012-3456", + }, ], } - response = await guardrail.async_moderation_hook( - data=request_data, - user_api_key_dict=mock_user_api_key_dict, - call_type="completion" - ) + try: + response = await guardrail.async_moderation_hook( + data=request_data, + user_api_key_dict=mock_user_api_key_dict, + call_type="completion", + ) + except httpx.HTTPStatusError as e: + print(f"error: {e.response.text}") + raise e print("response after moderation hook", response) if response: # Only assert if response is not None assert response["messages"][0]["content"] == "Hello, my phone number is {PHONE}" assert response["messages"][1]["content"] == "Hello, how can I help you today?" assert response["messages"][2]["content"] == "I need to cancel my order" - assert response["messages"][3]["content"] == "ok, my credit card number is {CREDIT_DEBIT_CARD_NUMBER}" + assert ( + response["messages"][3]["content"] + == "ok, my credit card number is {CREDIT_DEBIT_CARD_NUMBER}" + ) @pytest.mark.asyncio async def test_bedrock_guardrails_pii_masking_content_list(): # Create proper mock objects mock_user_api_key_dict = UserAPIKeyAuth() - + guardrail = BedrockGuardrail( guardrailIdentifier="wf0hkdb5x07f", guardrailVersion="DRAFT", @@ -56,35 +69,41 @@ async def test_bedrock_guardrails_pii_masking_content_list(): request_data = { "model": "gpt-4o", "messages": [ - {"role": "user", "content": [ - {"type": "text", "text": "Hello, my phone number is +1 412 555 1212"}, - {"type": "text", "text": "what time is it?"}, - ]}, - {"role": "assistant", "content": "Hello, how can I help you today?"}, { "role": "user", - "content": "who is the president of the united states?" - } + "content": [ + { + "type": "text", + "text": "Hello, my phone number is +1 412 555 1212", + }, + {"type": "text", "text": "what time is it?"}, + ], + }, + {"role": "assistant", "content": "Hello, how can I help you today?"}, + {"role": "user", "content": "who is the president of the united states?"}, ], } response = await guardrail.async_moderation_hook( data=request_data, user_api_key_dict=mock_user_api_key_dict, - call_type="completion" + call_type="completion", ) print(response) - + if response: # Only assert if response is not None # Verify that the list content is properly masked assert isinstance(response["messages"][0]["content"], list) - assert response["messages"][0]["content"][0]["text"] == "Hello, my phone number is {PHONE}" + assert ( + response["messages"][0]["content"][0]["text"] + == "Hello, my phone number is {PHONE}" + ) assert response["messages"][0]["content"][1]["text"] == "what time is it?" assert response["messages"][1]["content"] == "Hello, how can I help you today?" - assert response["messages"][2]["content"] == "who is the president of the united states?" - - - + assert ( + response["messages"][2]["content"] + == "who is the president of the united states?" + ) @pytest.mark.asyncio @@ -101,7 +120,7 @@ async def test_bedrock_guardrails_with_streaming(): user_api_key_cache=mock_user_api_key_cache, premium_user=True, ) - + guardrail = BedrockGuardrail( guardrailIdentifier="ff6ujrregl1q", guardrailVersion="DRAFT", @@ -113,14 +132,9 @@ async def test_bedrock_guardrails_with_streaming(): request_data = { "model": "gpt-4o", - "messages": [ - { - "role": "user", - "content": "Hi I like coffee" - } - ], + "messages": [{"role": "user", "content": "Hi I like coffee"}], "stream": True, - "metadata": {"guardrails": ["bedrock-post-guard"]} + "metadata": {"guardrails": ["bedrock-post-guard"]}, } response = await litellm.acompletion( @@ -132,7 +146,7 @@ async def test_bedrock_guardrails_with_streaming(): response=response, request_data=request_data, ) - + async for chunk in response: print(chunk) @@ -150,7 +164,7 @@ async def test_bedrock_guardrails_with_streaming_no_violation(): user_api_key_cache=mock_user_api_key_cache, premium_user=True, ) - + guardrail = BedrockGuardrail( guardrailIdentifier="ff6ujrregl1q", guardrailVersion="DRAFT", @@ -160,17 +174,11 @@ async def test_bedrock_guardrails_with_streaming_no_violation(): litellm.callbacks.append(guardrail) - request_data = { "model": "gpt-4o", - "messages": [ - { - "role": "user", - "content": "hi" - } - ], + "messages": [{"role": "user", "content": "hi"}], "stream": True, - "metadata": {"guardrails": ["bedrock-post-guard"]} + "metadata": {"guardrails": ["bedrock-post-guard"]}, } response = await litellm.acompletion( @@ -182,11 +190,10 @@ async def test_bedrock_guardrails_with_streaming_no_violation(): response=response, request_data=request_data, ) - - + async for chunk in response: print(chunk) - + @pytest.mark.asyncio async def test_bedrock_guardrails_streaming_request_body_mock(): @@ -196,7 +203,7 @@ async def test_bedrock_guardrails_streaming_request_body_mock(): from litellm.proxy._types import UserAPIKeyAuth from litellm.caching import DualCache from litellm.types.guardrails import GuardrailEventHooks - + # Create mock objects mock_user_api_key_dict = UserAPIKeyAuth() mock_cache = MagicMock(spec=DualCache) @@ -216,79 +223,68 @@ async def test_bedrock_guardrails_streaming_request_body_mock(): litellm.Choices( index=0, message=litellm.Message( - role="assistant", - content="The capital of Spain is Madrid." + role="assistant", content="The capital of Spain is Madrid." ), - finish_reason="stop" + finish_reason="stop", ) ], created=1234567890, model="gpt-4o", - object="chat.completion" + object="chat.completion", ) # Mock Bedrock API response mock_bedrock_response = MagicMock() mock_bedrock_response.status_code = 200 - mock_bedrock_response.json.return_value = { - "action": "NONE", - "outputs": [] - } + mock_bedrock_response.json.return_value = {"action": "NONE", "outputs": []} # Patch the async_handler.post method to capture the request body - with patch.object(guardrail, 'async_handler') as mock_async_handler: + with patch.object(guardrail, "async_handler") as mock_async_handler: mock_async_handler.post = AsyncMock(return_value=mock_bedrock_response) - + # Test data - simulating request data and assembled response request_data = { "model": "gpt-4o", - "messages": [ - { - "role": "user", - "content": "what's the capital of spain?" - } - ], + "messages": [{"role": "user", "content": "what's the capital of spain?"}], "stream": True, - "metadata": {"guardrails": ["bedrock-post-guard"]} + "metadata": {"guardrails": ["bedrock-post-guard"]}, } # Call the method that should make the Bedrock API request await guardrail.make_bedrock_api_request( - source="OUTPUT", - response=mock_response, - request_data=request_data + source="OUTPUT", response=mock_response, request_data=request_data ) # Verify the API call was made mock_async_handler.post.assert_called_once() - + # Get the request data that was passed call_args = mock_async_handler.post.call_args - + # The data should be in the 'data' parameter of the prepared request # We need to parse the JSON from the prepared request body - prepared_request_body = call_args.kwargs.get('data') - + prepared_request_body = call_args.kwargs.get("data") + # Parse the JSON body if isinstance(prepared_request_body, bytes): - actual_body = json.loads(prepared_request_body.decode('utf-8')) + actual_body = json.loads(prepared_request_body.decode("utf-8")) else: actual_body = json.loads(prepared_request_body) - + # Expected body based on the convert_to_bedrock_format method behavior expected_body = { - 'source': 'OUTPUT', - 'content': [ - {'text': {'text': 'The capital of Spain is Madrid.'}} - ] + "source": "OUTPUT", + "content": [{"text": {"text": "The capital of Spain is Madrid."}}], } - + print("Actual Bedrock request body:", json.dumps(actual_body, indent=2)) print("Expected Bedrock request body:", json.dumps(expected_body, indent=2)) - + # Assert the request body matches exactly - assert actual_body == expected_body, f"Request body mismatch. Expected: {expected_body}, Got: {actual_body}" - + assert ( + actual_body == expected_body + ), f"Request body mismatch. Expected: {expected_body}, Got: {actual_body}" + @pytest.mark.asyncio async def test_bedrock_guardrail_aws_param_persistence(): @@ -306,23 +302,31 @@ async def test_bedrock_guardrail_aws_param_persistence(): guardrail_name="bedrock-post-guard", ) - with patch.object(guardrail, "get_credentials", wraps=guardrail.get_credentials) as mock_get_creds: + with patch.object( + guardrail, "get_credentials", wraps=guardrail.get_credentials + ) as mock_get_creds: for i in range(3): request_data = { "model": "gpt-4o", - "messages": [ - {"role": "user", "content": f"request {i}"} - ], + "messages": [{"role": "user", "content": f"request {i}"}], "stream": False, - "metadata": {"guardrails": ["bedrock-post-guard"]} + "metadata": {"guardrails": ["bedrock-post-guard"]}, } - with patch.object(guardrail.async_handler, "post", new_callable=AsyncMock) as mock_post: + with patch.object( + guardrail.async_handler, "post", new_callable=AsyncMock + ) as mock_post: # Configure the mock response properly mock_response = AsyncMock() mock_response.status_code = 200 - mock_response.json = MagicMock(return_value={"action": "NONE", "outputs": []}) + mock_response.json = MagicMock( + return_value={"action": "NONE", "outputs": []} + ) mock_post.return_value = mock_response - await guardrail.make_bedrock_api_request(source="INPUT", messages=request_data.get("messages"), request_data=request_data) + await guardrail.make_bedrock_api_request( + source="INPUT", + messages=request_data.get("messages"), + request_data=request_data, + ) assert mock_get_creds.call_count == 3 for call in mock_get_creds.call_args_list: @@ -332,114 +336,124 @@ async def test_bedrock_guardrail_aws_param_persistence(): assert kwargs["aws_secret_access_key"] == "test-secret-key" assert kwargs["aws_region_name"] == "us-east-1" + @pytest.mark.asyncio async def test_bedrock_guardrail_blocked_vs_anonymized_actions(): """Test that BLOCKED actions raise exceptions but ANONYMIZED actions do not""" from unittest.mock import MagicMock - from litellm.proxy.guardrails.guardrail_hooks.bedrock_guardrails import BedrockGuardrail - from litellm.types.proxy.guardrails.guardrail_hooks.bedrock_guardrails import BedrockGuardrailResponse - + from litellm.proxy.guardrails.guardrail_hooks.bedrock_guardrails import ( + BedrockGuardrail, + ) + from litellm.types.proxy.guardrails.guardrail_hooks.bedrock_guardrails import ( + BedrockGuardrailResponse, + ) + guardrail = BedrockGuardrail( - guardrailIdentifier="test-guardrail", - guardrailVersion="DRAFT" + guardrailIdentifier="test-guardrail", guardrailVersion="DRAFT" ) - + # Test 1: ANONYMIZED action should NOT raise exception anonymized_response: BedrockGuardrailResponse = { "action": "GUARDRAIL_INTERVENED", - "outputs": [{ - "text": "Hello, my phone number is {PHONE}" - }], - "assessments": [{ - "sensitiveInformationPolicy": { - "piiEntities": [{ - "type": "PHONE", - "match": "+1 412 555 1212", - "action": "ANONYMIZED" - }] + "outputs": [{"text": "Hello, my phone number is {PHONE}"}], + "assessments": [ + { + "sensitiveInformationPolicy": { + "piiEntities": [ + { + "type": "PHONE", + "match": "+1 412 555 1212", + "action": "ANONYMIZED", + } + ] + } } - }] + ], } - - should_raise = guardrail._should_raise_guardrail_blocked_exception(anonymized_response) + + should_raise = guardrail._should_raise_guardrail_blocked_exception( + anonymized_response + ) assert should_raise is False, "ANONYMIZED actions should not raise exceptions" - + # Test 2: BLOCKED action should raise exception blocked_response: BedrockGuardrailResponse = { - "action": "GUARDRAIL_INTERVENED", - "outputs": [{ - "text": "I can't provide that information." - }], - "assessments": [{ - "topicPolicy": { - "topics": [{ - "name": "Sensitive Topic", - "type": "DENY", - "action": "BLOCKED" - }] + "action": "GUARDRAIL_INTERVENED", + "outputs": [{"text": "I can't provide that information."}], + "assessments": [ + { + "topicPolicy": { + "topics": [ + {"name": "Sensitive Topic", "type": "DENY", "action": "BLOCKED"} + ] + } } - }] + ], } - + should_raise = guardrail._should_raise_guardrail_blocked_exception(blocked_response) assert should_raise is True, "BLOCKED actions should raise exceptions" - + # Test 3: Mixed actions - should raise if ANY action is BLOCKED mixed_response: BedrockGuardrailResponse = { "action": "GUARDRAIL_INTERVENED", - "outputs": [{ - "text": "I can't provide that information." - }], - "assessments": [{ - "sensitiveInformationPolicy": { - "piiEntities": [{ - "type": "PHONE", - "match": "+1 412 555 1212", - "action": "ANONYMIZED" - }] - }, - "topicPolicy": { - "topics": [{ - "name": "Blocked Topic", - "type": "DENY", - "action": "BLOCKED" - }] + "outputs": [{"text": "I can't provide that information."}], + "assessments": [ + { + "sensitiveInformationPolicy": { + "piiEntities": [ + { + "type": "PHONE", + "match": "+1 412 555 1212", + "action": "ANONYMIZED", + } + ] + }, + "topicPolicy": { + "topics": [ + {"name": "Blocked Topic", "type": "DENY", "action": "BLOCKED"} + ] + }, } - }] + ], } - + should_raise = guardrail._should_raise_guardrail_blocked_exception(mixed_response) - assert should_raise is True, "Mixed actions with any BLOCKED should raise exceptions" - + assert ( + should_raise is True + ), "Mixed actions with any BLOCKED should raise exceptions" + # Test 4: NONE action should not raise exception none_response: BedrockGuardrailResponse = { "action": "NONE", "outputs": [], - "assessments": [] + "assessments": [], } - + should_raise = guardrail._should_raise_guardrail_blocked_exception(none_response) assert should_raise is False, "NONE actions should not raise exceptions" - + # Test 5: Test other policy types with BLOCKED actions content_blocked_response: BedrockGuardrailResponse = { "action": "GUARDRAIL_INTERVENED", - "outputs": [{ - "text": "I can't provide that information." - }], - "assessments": [{ - "contentPolicy": { - "filters": [{ - "type": "VIOLENCE", - "confidence": "HIGH", - "action": "BLOCKED" - }] + "outputs": [{"text": "I can't provide that information."}], + "assessments": [ + { + "contentPolicy": { + "filters": [ + {"type": "VIOLENCE", "confidence": "HIGH", "action": "BLOCKED"} + ] + } } - }] + ], } - - should_raise = guardrail._should_raise_guardrail_blocked_exception(content_blocked_response) - assert should_raise is True, "Content policy BLOCKED actions should raise exceptions" + + should_raise = guardrail._should_raise_guardrail_blocked_exception( + content_blocked_response + ) + assert ( + should_raise is True + ), "Content policy BLOCKED actions should raise exceptions" @pytest.mark.asyncio @@ -448,10 +462,10 @@ async def test_bedrock_guardrail_masking_with_anonymized_response(): from unittest.mock import AsyncMock, MagicMock, patch from litellm.proxy._types import UserAPIKeyAuth from litellm.caching import DualCache - + # Create proper mock objects mock_user_api_key_dict = UserAPIKeyAuth() - + guardrail = BedrockGuardrail( guardrailIdentifier="test-guardrail", guardrailVersion="DRAFT", @@ -463,18 +477,20 @@ async def test_bedrock_guardrail_masking_with_anonymized_response(): mock_bedrock_response.status_code = 200 mock_bedrock_response.json.return_value = { "action": "GUARDRAIL_INTERVENED", - "outputs": [{ - "text": "Hello, my phone number is {PHONE}" - }], - "assessments": [{ - "sensitiveInformationPolicy": { - "piiEntities": [{ - "type": "PHONE", - "match": "+1 412 555 1212", - "action": "ANONYMIZED" - }] + "outputs": [{"text": "Hello, my phone number is {PHONE}"}], + "assessments": [ + { + "sensitiveInformationPolicy": { + "piiEntities": [ + { + "type": "PHONE", + "match": "+1 412 555 1212", + "action": "ANONYMIZED", + } + ] + } } - }] + ], } request_data = { @@ -485,21 +501,28 @@ async def test_bedrock_guardrail_masking_with_anonymized_response(): } # Patch the async_handler.post method - with patch.object(guardrail.async_handler, 'post', new_callable=AsyncMock) as mock_post: + with patch.object( + guardrail.async_handler, "post", new_callable=AsyncMock + ) as mock_post: mock_post.return_value = mock_bedrock_response - + # This should NOT raise an exception since action is ANONYMIZED try: response = await guardrail.async_moderation_hook( data=request_data, user_api_key_dict=mock_user_api_key_dict, - call_type="completion" + call_type="completion", ) # Should succeed and return data with masked content assert response is not None - assert response["messages"][0]["content"] == "Hello, my phone number is {PHONE}" + assert ( + response["messages"][0]["content"] + == "Hello, my phone number is {PHONE}" + ) except Exception as e: - pytest.fail(f"Should not raise exception for ANONYMIZED actions, but got: {e}") + pytest.fail( + f"Should not raise exception for ANONYMIZED actions, but got: {e}" + ) @pytest.mark.asyncio @@ -507,10 +530,10 @@ async def test_bedrock_guardrail_uses_masked_output_without_masking_flags(): """Test that masked output from guardrails is used even when masking flags are not enabled""" from unittest.mock import AsyncMock, MagicMock, patch from litellm.proxy._types import UserAPIKeyAuth - + # Create proper mock objects mock_user_api_key_dict = UserAPIKeyAuth() - + # Create guardrail WITHOUT masking flags enabled guardrail = BedrockGuardrail( guardrailIdentifier="test-guardrail", @@ -523,48 +546,56 @@ async def test_bedrock_guardrail_uses_masked_output_without_masking_flags(): mock_bedrock_response.status_code = 200 mock_bedrock_response.json.return_value = { "action": "GUARDRAIL_INTERVENED", - "outputs": [{ - "text": "Hello, my phone number is {PHONE} and email is {EMAIL}" - }], - "assessments": [{ - "sensitiveInformationPolicy": { - "piiEntities": [ - { - "type": "PHONE", - "match": "+1 412 555 1212", - "action": "ANONYMIZED" - }, - { - "type": "EMAIL", - "match": "user@example.com", - "action": "ANONYMIZED" - } - ] + "outputs": [{"text": "Hello, my phone number is {PHONE} and email is {EMAIL}"}], + "assessments": [ + { + "sensitiveInformationPolicy": { + "piiEntities": [ + { + "type": "PHONE", + "match": "+1 412 555 1212", + "action": "ANONYMIZED", + }, + { + "type": "EMAIL", + "match": "user@example.com", + "action": "ANONYMIZED", + }, + ] + } } - }] + ], } request_data = { "model": "gpt-4o", "messages": [ - {"role": "user", "content": "Hello, my phone number is +1 412 555 1212 and email is user@example.com"}, + { + "role": "user", + "content": "Hello, my phone number is +1 412 555 1212 and email is user@example.com", + }, ], } # Patch the async_handler.post method - with patch.object(guardrail.async_handler, 'post', new_callable=AsyncMock) as mock_post: + with patch.object( + guardrail.async_handler, "post", new_callable=AsyncMock + ) as mock_post: mock_post.return_value = mock_bedrock_response - + # This should use the masked output even without masking flags response = await guardrail.async_moderation_hook( data=request_data, user_api_key_dict=mock_user_api_key_dict, - call_type="completion" + call_type="completion", ) - + # Should use the masked content from guardrail output assert response is not None - assert response["messages"][0]["content"] == "Hello, my phone number is {PHONE} and email is {EMAIL}" + assert ( + response["messages"][0]["content"] + == "Hello, my phone number is {PHONE} and email is {EMAIL}" + ) print("✅ Masked output was applied even without masking flags enabled") @@ -573,10 +604,10 @@ async def test_bedrock_guardrail_response_pii_masking_non_streaming(): """Test that PII masking is applied to response content in non-streaming scenarios""" from unittest.mock import AsyncMock, MagicMock, patch from litellm.proxy._types import UserAPIKeyAuth - + # Create proper mock objects mock_user_api_key_dict = UserAPIKeyAuth() - + # Create guardrail with response masking enabled guardrail = BedrockGuardrail( guardrailIdentifier="test-guardrail", @@ -588,25 +619,29 @@ async def test_bedrock_guardrail_response_pii_masking_non_streaming(): mock_bedrock_response.status_code = 200 mock_bedrock_response.json.return_value = { "action": "GUARDRAIL_INTERVENED", - "outputs": [{ - "text": "My credit card number is {CREDIT_DEBIT_CARD_NUMBER} and my phone is {PHONE}" - }], - "assessments": [{ - "sensitiveInformationPolicy": { - "piiEntities": [ - { - "type": "CREDIT_DEBIT_CARD_NUMBER", - "match": "1234-5678-9012-3456", - "action": "ANONYMIZED" - }, - { - "type": "PHONE", - "match": "+1 412 555 1212", - "action": "ANONYMIZED" - } - ] + "outputs": [ + { + "text": "My credit card number is {CREDIT_DEBIT_CARD_NUMBER} and my phone is {PHONE}" + } + ], + "assessments": [ + { + "sensitiveInformationPolicy": { + "piiEntities": [ + { + "type": "CREDIT_DEBIT_CARD_NUMBER", + "match": "1234-5678-9012-3456", + "action": "ANONYMIZED", + }, + { + "type": "PHONE", + "match": "+1 412 555 1212", + "action": "ANONYMIZED", + }, + ] + } } - }] + ], } # Create a mock response that contains PII @@ -616,15 +651,15 @@ async def test_bedrock_guardrail_response_pii_masking_non_streaming(): litellm.Choices( index=0, message=litellm.Message( - role="assistant", - content="My credit card number is 1234-5678-9012-3456 and my phone is +1 412 555 1212" + role="assistant", + content="My credit card number is 1234-5678-9012-3456 and my phone is +1 412 555 1212", ), - finish_reason="stop" + finish_reason="stop", ) ], created=1234567890, model="gpt-4o", - object="chat.completion" + object="chat.completion", ) request_data = { @@ -635,18 +670,23 @@ async def test_bedrock_guardrail_response_pii_masking_non_streaming(): } # Patch the async_handler.post method - with patch.object(guardrail.async_handler, 'post', new_callable=AsyncMock) as mock_post: + with patch.object( + guardrail.async_handler, "post", new_callable=AsyncMock + ) as mock_post: mock_post.return_value = mock_bedrock_response - + # Call the post-call success hook await guardrail.async_post_call_success_hook( data=request_data, user_api_key_dict=mock_user_api_key_dict, - response=mock_response + response=mock_response, ) - + # Verify that the response content was masked - assert mock_response.choices[0].message.content == "My credit card number is {CREDIT_DEBIT_CARD_NUMBER} and my phone is {PHONE}" + assert ( + mock_response.choices[0].message.content + == "My credit card number is {CREDIT_DEBIT_CARD_NUMBER} and my phone is {PHONE}" + ) print("✓ Non-streaming response PII masking test passed") @@ -656,10 +696,10 @@ async def test_bedrock_guardrail_response_pii_masking_streaming(): from unittest.mock import AsyncMock, MagicMock, patch from litellm.proxy._types import UserAPIKeyAuth from litellm.types.utils import ModelResponseStream - + # Create proper mock objects mock_user_api_key_dict = UserAPIKeyAuth() - + # Create guardrail with response masking enabled guardrail = BedrockGuardrail( guardrailIdentifier="test-guardrail", @@ -671,25 +711,25 @@ async def test_bedrock_guardrail_response_pii_masking_streaming(): mock_bedrock_response.status_code = 200 mock_bedrock_response.json.return_value = { "action": "GUARDRAIL_INTERVENED", - "outputs": [{ - "text": "Sure! My email is {EMAIL} and SSN is {US_SSN}" - }], - "assessments": [{ - "sensitiveInformationPolicy": { - "piiEntities": [ - { - "type": "EMAIL", - "match": "john@example.com", - "action": "ANONYMIZED" - }, - { - "type": "US_SSN", - "match": "123-45-6789", - "action": "ANONYMIZED" - } - ] + "outputs": [{"text": "Sure! My email is {EMAIL} and SSN is {US_SSN}"}], + "assessments": [ + { + "sensitiveInformationPolicy": { + "piiEntities": [ + { + "type": "EMAIL", + "match": "john@example.com", + "action": "ANONYMIZED", + }, + { + "type": "US_SSN", + "match": "123-45-6789", + "action": "ANONYMIZED", + }, + ] + } } - }] + ], } # Create mock streaming chunks @@ -701,25 +741,27 @@ async def mock_streaming_response(): litellm.utils.StreamingChoices( index=0, delta=litellm.utils.Delta(content="Sure! My email is "), - finish_reason=None + finish_reason=None, ) ], created=1234567890, model="gpt-4o", - object="chat.completion.chunk" + object="chat.completion.chunk", ), ModelResponseStream( id="test-id", choices=[ litellm.utils.StreamingChoices( index=0, - delta=litellm.utils.Delta(content="john@example.com and SSN is "), - finish_reason=None + delta=litellm.utils.Delta( + content="john@example.com and SSN is " + ), + finish_reason=None, ) ], created=1234567890, model="gpt-4o", - object="chat.completion.chunk" + object="chat.completion.chunk", ), ModelResponseStream( id="test-id", @@ -727,13 +769,13 @@ async def mock_streaming_response(): litellm.utils.StreamingChoices( index=0, delta=litellm.utils.Delta(content="123-45-6789"), - finish_reason="stop" + finish_reason="stop", ) ], created=1234567890, model="gpt-4o", - object="chat.completion.chunk" - ) + object="chat.completion.chunk", + ), ] for chunk in chunks: yield chunk @@ -747,32 +789,37 @@ async def mock_streaming_response(): } # Patch the async_handler.post method - with patch.object(guardrail.async_handler, 'post', new_callable=AsyncMock) as mock_post: + with patch.object( + guardrail.async_handler, "post", new_callable=AsyncMock + ) as mock_post: mock_post.return_value = mock_bedrock_response - + # Call the streaming hook masked_stream = guardrail.async_post_call_streaming_iterator_hook( user_api_key_dict=mock_user_api_key_dict, response=mock_streaming_response(), - request_data=request_data + request_data=request_data, ) - + # Collect all chunks from the masked stream masked_chunks = [] async for chunk in masked_stream: masked_chunks.append(chunk) - + # Verify that we got chunks back assert len(masked_chunks) > 0 - + # Reconstruct the full response from chunks to verify masking full_content = "" for chunk in masked_chunks: - if hasattr(chunk, 'choices') and chunk.choices: - if hasattr(chunk.choices[0], 'delta') and chunk.choices[0].delta: - if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content: + if hasattr(chunk, "choices") and chunk.choices: + if hasattr(chunk.choices[0], "delta") and chunk.choices[0].delta: + if ( + hasattr(chunk.choices[0].delta, "content") + and chunk.choices[0].delta.content + ): full_content += chunk.choices[0].delta.content - + # Verify that the reconstructed content contains the masked PII assert "Sure! My email is {EMAIL} and SSN is {US_SSN}" == full_content print("✓ Streaming response PII masking test passed") @@ -781,64 +828,70 @@ async def mock_streaming_response(): @pytest.mark.asyncio async def test_convert_to_bedrock_format_input_source(): """Test convert_to_bedrock_format with INPUT source and mock messages""" - from litellm.proxy.guardrails.guardrail_hooks.bedrock_guardrails import BedrockGuardrail - from litellm.types.proxy.guardrails.guardrail_hooks.bedrock_guardrails import BedrockRequest + from litellm.proxy.guardrails.guardrail_hooks.bedrock_guardrails import ( + BedrockGuardrail, + ) + from litellm.types.proxy.guardrails.guardrail_hooks.bedrock_guardrails import ( + BedrockRequest, + ) from unittest.mock import patch - + # Create the guardrail instance guardrail = BedrockGuardrail( - guardrailIdentifier="test-guardrail", - guardrailVersion="DRAFT" + guardrailIdentifier="test-guardrail", guardrailVersion="DRAFT" ) - + # Mock messages mock_messages = [ {"role": "user", "content": "Hello, how are you?"}, {"role": "assistant", "content": "I'm doing well, thank you!"}, - {"role": "user", "content": [ - {"type": "text", "text": "What's the weather like?"}, - {"type": "text", "text": "Is it sunny today?"} - ]} + { + "role": "user", + "content": [ + {"type": "text", "text": "What's the weather like?"}, + {"type": "text", "text": "Is it sunny today?"}, + ], + }, ] - + # Call the method - result = guardrail.convert_to_bedrock_format( - source="INPUT", - messages=mock_messages - ) - + result = guardrail.convert_to_bedrock_format(source="INPUT", messages=mock_messages) + # Verify the result structure assert isinstance(result, dict) assert result.get("source") == "INPUT" assert "content" in result assert isinstance(result.get("content"), list) - + # Verify content items expected_content_items = [ {"text": {"text": "Hello, how are you?"}}, {"text": {"text": "I'm doing well, thank you!"}}, {"text": {"text": "What's the weather like?"}}, - {"text": {"text": "Is it sunny today?"}} + {"text": {"text": "Is it sunny today?"}}, ] - + assert result.get("content") == expected_content_items print("✅ INPUT source test passed - result:", result) -@pytest.mark.asyncio +@pytest.mark.asyncio async def test_convert_to_bedrock_format_output_source(): """Test convert_to_bedrock_format with OUTPUT source and mock ModelResponse""" - from litellm.proxy.guardrails.guardrail_hooks.bedrock_guardrails import BedrockGuardrail - from litellm.types.proxy.guardrails.guardrail_hooks.bedrock_guardrails import BedrockRequest + from litellm.proxy.guardrails.guardrail_hooks.bedrock_guardrails import ( + BedrockGuardrail, + ) + from litellm.types.proxy.guardrails.guardrail_hooks.bedrock_guardrails import ( + BedrockRequest, + ) import litellm from unittest.mock import patch - - # Create the guardrail instance + + # Create the guardrail instance guardrail = BedrockGuardrail( - guardrailIdentifier="test-guardrail", - guardrailVersion="DRAFT" + guardrailIdentifier="test-guardrail", guardrailVersion="DRAFT" ) - + # Mock ModelResponse mock_response = litellm.ModelResponse( id="test-response-id", @@ -846,43 +899,40 @@ async def test_convert_to_bedrock_format_output_source(): litellm.Choices( index=0, message=litellm.Message( - role="assistant", - content="This is a test response from the model." + role="assistant", content="This is a test response from the model." ), - finish_reason="stop" + finish_reason="stop", ), litellm.Choices( - index=1, + index=1, message=litellm.Message( - role="assistant", - content="This is a second choice response." + role="assistant", content="This is a second choice response." ), - finish_reason="stop" - ) + finish_reason="stop", + ), ], created=1234567890, model="gpt-4o", - object="chat.completion" + object="chat.completion", ) - + # Call the method result = guardrail.convert_to_bedrock_format( - source="OUTPUT", - response=mock_response + source="OUTPUT", response=mock_response ) - + # Verify the result structure assert isinstance(result, dict) assert result.get("source") == "OUTPUT" assert "content" in result assert isinstance(result.get("content"), list) - + # Verify content items - should contain both choice contents expected_content_items = [ {"text": {"text": "This is a test response from the model."}}, - {"text": {"text": "This is a second choice response."}} + {"text": {"text": "This is a second choice response."}}, ] - + assert result.get("content") == expected_content_items print("✅ OUTPUT source test passed - result:", result) @@ -894,16 +944,15 @@ async def test_convert_to_bedrock_format_post_call_streaming_hook(): from litellm.proxy._types import UserAPIKeyAuth from litellm.types.utils import ModelResponseStream import litellm - + # Create proper mock objects mock_user_api_key_dict = UserAPIKeyAuth() - + # Create guardrail instance guardrail = BedrockGuardrail( - guardrailIdentifier="test-guardrail", - guardrailVersion="DRAFT" + guardrailIdentifier="test-guardrail", guardrailVersion="DRAFT" ) - + # Mock streaming chunks that contain PII async def mock_streaming_response(): chunks = [ @@ -913,12 +962,12 @@ async def mock_streaming_response(): litellm.utils.StreamingChoices( index=0, delta=litellm.utils.Delta(content="My email is "), - finish_reason=None + finish_reason=None, ) ], created=1234567890, model="gpt-4o", - object="chat.completion.chunk" + object="chat.completion.chunk", ), ModelResponseStream( id="test-id", @@ -926,99 +975,121 @@ async def mock_streaming_response(): litellm.utils.StreamingChoices( index=0, delta=litellm.utils.Delta(content="john@example.com"), - finish_reason="stop" + finish_reason="stop", ) ], created=1234567890, model="gpt-4o", - object="chat.completion.chunk" - ) + object="chat.completion.chunk", + ), ] for chunk in chunks: yield chunk - + # Mock Bedrock API response with PII masking mock_bedrock_response = MagicMock() mock_bedrock_response.status_code = 200 mock_bedrock_response.json.return_value = { "action": "GUARDRAIL_INTERVENED", - "outputs": [{ - "text": "My email is {EMAIL}" - }], - "assessments": [{ - "sensitiveInformationPolicy": { - "piiEntities": [{ - "type": "EMAIL", - "match": "john@example.com", - "action": "ANONYMIZED" - }] + "outputs": [{"text": "My email is {EMAIL}"}], + "assessments": [ + { + "sensitiveInformationPolicy": { + "piiEntities": [ + { + "type": "EMAIL", + "match": "john@example.com", + "action": "ANONYMIZED", + } + ] + } } - }] + ], } - + request_data = { "model": "gpt-4o", - "messages": [ - {"role": "user", "content": "What's your email?"} - ], - "stream": True + "messages": [{"role": "user", "content": "What's your email?"}], + "stream": True, } - + # Track which bedrock API calls were made bedrock_calls = [] - + # Mock the make_bedrock_api_request method to track calls - async def mock_make_bedrock_api_request(source, messages=None, response=None, request_data=None): - bedrock_calls.append({ - "source": source, - "messages": messages, - "response": response, - "request_data": request_data - }) + async def mock_make_bedrock_api_request( + source, messages=None, response=None, request_data=None + ): + bedrock_calls.append( + { + "source": source, + "messages": messages, + "response": response, + "request_data": request_data, + } + ) # Return the mock bedrock response - from litellm.types.proxy.guardrails.guardrail_hooks.bedrock_guardrails import BedrockGuardrailResponse + from litellm.types.proxy.guardrails.guardrail_hooks.bedrock_guardrails import ( + BedrockGuardrailResponse, + ) + return BedrockGuardrailResponse(**mock_bedrock_response.json()) - + # Patch the bedrock API request method - with patch.object(guardrail, 'make_bedrock_api_request', side_effect=mock_make_bedrock_api_request): - + with patch.object( + guardrail, "make_bedrock_api_request", side_effect=mock_make_bedrock_api_request + ): + # Call the streaming hook result_generator = guardrail.async_post_call_streaming_iterator_hook( user_api_key_dict=mock_user_api_key_dict, response=mock_streaming_response(), - request_data=request_data + request_data=request_data, ) - + # Collect all chunks from the result result_chunks = [] async for chunk in result_generator: result_chunks.append(chunk) - + # Verify bedrock API calls were made - assert len(bedrock_calls) == 2, f"Expected 2 bedrock calls (INPUT and OUTPUT), got {len(bedrock_calls)}" - + assert ( + len(bedrock_calls) == 2 + ), f"Expected 2 bedrock calls (INPUT and OUTPUT), got {len(bedrock_calls)}" + # Find the OUTPUT call output_calls = [call for call in bedrock_calls if call["source"] == "OUTPUT"] - assert len(output_calls) == 1, f"Expected 1 OUTPUT call, got {len(output_calls)}" - + assert ( + len(output_calls) == 1 + ), f"Expected 1 OUTPUT call, got {len(output_calls)}" + output_call = output_calls[0] assert output_call["source"] == "OUTPUT" assert output_call["response"] is not None assert output_call["messages"] is None # OUTPUT calls don't need messages - + # Verify that the response content was masked # The streaming chunks should now contain the masked content full_content = "" for chunk in result_chunks: - if hasattr(chunk, 'choices') and chunk.choices: - if hasattr(chunk.choices[0], 'delta') and chunk.choices[0].delta.content: + if hasattr(chunk, "choices") and chunk.choices: + if ( + hasattr(chunk.choices[0], "delta") + and chunk.choices[0].delta.content + ): full_content += chunk.choices[0].delta.content - + # The content should be masked (contains {EMAIL} instead of john@example.com) - assert "{EMAIL}" in full_content, f"Expected masked content with {{EMAIL}}, got: {full_content}" - assert "john@example.com" not in full_content, f"Original email should be masked, got: {full_content}" - - print("✅ Post-call streaming hook test passed - OUTPUT source used for masking") + assert ( + "{EMAIL}" in full_content + ), f"Expected masked content with {{EMAIL}}, got: {full_content}" + assert ( + "john@example.com" not in full_content + ), f"Original email should be masked, got: {full_content}" + + print( + "✅ Post-call streaming hook test passed - OUTPUT source used for masking" + ) print(f"✅ Bedrock calls made: {[call['source'] for call in bedrock_calls]}") print(f"✅ Final masked content: {full_content}") @@ -1029,13 +1100,12 @@ async def test_bedrock_guardrail_blocked_action_shows_output_text(): from unittest.mock import AsyncMock, MagicMock, patch from litellm.proxy._types import UserAPIKeyAuth from fastapi import HTTPException - + # Create proper mock objects mock_user_api_key_dict = UserAPIKeyAuth() - + guardrail = BedrockGuardrail( - guardrailIdentifier="test-guardrail", - guardrailVersion="DRAFT" + guardrailIdentifier="test-guardrail", guardrailVersion="DRAFT" ) # Mock the Bedrock API response with BLOCKED action and output text @@ -1043,20 +1113,16 @@ async def test_bedrock_guardrail_blocked_action_shows_output_text(): mock_bedrock_response.status_code = 200 mock_bedrock_response.json.return_value = { "action": "GUARDRAIL_INTERVENED", - "outputs": [ + "outputs": [{"text": "this violates litellm corporate guardrail policy"}], + "assessments": [ { - "text": "this violates litellm corporate guardrail policy" + "topicPolicy": { + "topics": [ + {"name": "Sensitive Topic", "type": "DENY", "action": "BLOCKED"} + ] + } } ], - "assessments": [{ - "topicPolicy": { - "topics": [{ - "name": "Sensitive Topic", - "type": "DENY", - "action": "BLOCKED" - }] - } - }] } request_data = { @@ -1067,32 +1133,36 @@ async def test_bedrock_guardrail_blocked_action_shows_output_text(): } # Patch the async_handler.post method - with patch.object(guardrail.async_handler, 'post', new_callable=AsyncMock) as mock_post: + with patch.object( + guardrail.async_handler, "post", new_callable=AsyncMock + ) as mock_post: mock_post.return_value = mock_bedrock_response - + # This should raise HTTPException due to BLOCKED action with pytest.raises(HTTPException) as exc_info: await guardrail.async_moderation_hook( data=request_data, user_api_key_dict=mock_user_api_key_dict, - call_type="completion" + call_type="completion", ) - + # Verify the exception details exception = exc_info.value assert exception.status_code == 400 assert "detail" in exception.__dict__ - + # Check that the detail contains the expected structure detail = exception.detail assert isinstance(detail, dict) assert detail["error"] == "Violated guardrail policy" - + # Verify that the output text from both outputs is included expected_output_text = "this violates litellm corporate guardrail policy" assert detail["bedrock_guardrail_response"] == expected_output_text - - print("✅ BLOCKED action HTTPException test passed - output text properly included") + + print( + "✅ BLOCKED action HTTPException test passed - output text properly included" + ) @pytest.mark.asyncio @@ -1101,13 +1171,12 @@ async def test_bedrock_guardrail_blocked_action_empty_outputs(): from unittest.mock import AsyncMock, MagicMock, patch from litellm.proxy._types import UserAPIKeyAuth from fastapi import HTTPException - + # Create proper mock objects mock_user_api_key_dict = UserAPIKeyAuth() - + guardrail = BedrockGuardrail( - guardrailIdentifier="test-guardrail", - guardrailVersion="DRAFT" + guardrailIdentifier="test-guardrail", guardrailVersion="DRAFT" ) # Mock the Bedrock API response with BLOCKED action but no outputs @@ -1116,15 +1185,15 @@ async def test_bedrock_guardrail_blocked_action_empty_outputs(): mock_bedrock_response.json.return_value = { "action": "GUARDRAIL_INTERVENED", "outputs": [], # Empty outputs - "assessments": [{ - "contentPolicy": { - "filters": [{ - "type": "VIOLENCE", - "confidence": "HIGH", - "action": "BLOCKED" - }] + "assessments": [ + { + "contentPolicy": { + "filters": [ + {"type": "VIOLENCE", "confidence": "HIGH", "action": "BLOCKED"} + ] + } } - }] + ], } request_data = { @@ -1135,27 +1204,29 @@ async def test_bedrock_guardrail_blocked_action_empty_outputs(): } # Patch the async_handler.post method - with patch.object(guardrail.async_handler, 'post', new_callable=AsyncMock) as mock_post: + with patch.object( + guardrail.async_handler, "post", new_callable=AsyncMock + ) as mock_post: mock_post.return_value = mock_bedrock_response - + # This should raise HTTPException due to BLOCKED action with pytest.raises(HTTPException) as exc_info: await guardrail.async_moderation_hook( data=request_data, user_api_key_dict=mock_user_api_key_dict, - call_type="completion" + call_type="completion", ) - + # Verify the exception details exception = exc_info.value assert exception.status_code == 400 - + # Check that the detail contains the expected structure with empty output text detail = exception.detail assert isinstance(detail, dict) assert detail["error"] == "Violated guardrail policy" assert detail["bedrock_guardrail_response"] == "" # Empty string for no outputs - + print("✅ BLOCKED action with empty outputs test passed") @@ -1165,15 +1236,15 @@ async def test_bedrock_guardrail_disable_exception_on_block_non_streaming(): from unittest.mock import AsyncMock, MagicMock, patch from litellm.proxy._types import UserAPIKeyAuth from fastapi import HTTPException - + # Create proper mock objects mock_user_api_key_dict = UserAPIKeyAuth() - + # Test 1: disable_exception_on_block=False (default) - should raise exception guardrail_default = BedrockGuardrail( guardrailIdentifier="test-guardrail", guardrailVersion="DRAFT", - disable_exception_on_block=False + disable_exception_on_block=False, ) # Mock the Bedrock API response with BLOCKED action @@ -1181,18 +1252,16 @@ async def test_bedrock_guardrail_disable_exception_on_block_non_streaming(): mock_bedrock_response.status_code = 200 mock_bedrock_response.json.return_value = { "action": "GUARDRAIL_INTERVENED", - "outputs": [{ - "text": "I can't provide that information." - }], - "assessments": [{ - "topicPolicy": { - "topics": [{ - "name": "Sensitive Topic", - "type": "DENY", - "action": "BLOCKED" - }] + "outputs": [{"text": "I can't provide that information."}], + "assessments": [ + { + "topicPolicy": { + "topics": [ + {"name": "Sensitive Topic", "type": "DENY", "action": "BLOCKED"} + ] + } } - }] + ], } request_data = { @@ -1203,17 +1272,19 @@ async def test_bedrock_guardrail_disable_exception_on_block_non_streaming(): } # Patch the async_handler.post method - with patch.object(guardrail_default.async_handler, 'post', new_callable=AsyncMock) as mock_post: + with patch.object( + guardrail_default.async_handler, "post", new_callable=AsyncMock + ) as mock_post: mock_post.return_value = mock_bedrock_response - + # Should raise HTTPException when disable_exception_on_block=False with pytest.raises(HTTPException) as exc_info: await guardrail_default.async_moderation_hook( data=request_data, user_api_key_dict=mock_user_api_key_dict, - call_type="completion" + call_type="completion", ) - + # Verify the exception details exception = exc_info.value assert exception.status_code == 400 @@ -1223,24 +1294,28 @@ async def test_bedrock_guardrail_disable_exception_on_block_non_streaming(): guardrail_disabled = BedrockGuardrail( guardrailIdentifier="test-guardrail", guardrailVersion="DRAFT", - disable_exception_on_block=True + disable_exception_on_block=True, ) - with patch.object(guardrail_disabled.async_handler, 'post', new_callable=AsyncMock) as mock_post: + with patch.object( + guardrail_disabled.async_handler, "post", new_callable=AsyncMock + ) as mock_post: mock_post.return_value = mock_bedrock_response - + # Should NOT raise exception when disable_exception_on_block=True try: response = await guardrail_disabled.async_moderation_hook( data=request_data, user_api_key_dict=mock_user_api_key_dict, - call_type="completion" + call_type="completion", ) # Should succeed and return data (even though content was blocked) assert response is not None print("✅ No exception raised when disable_exception_on_block=True") except Exception as e: - pytest.fail(f"Should not raise exception when disable_exception_on_block=True, but got: {e}") + pytest.fail( + f"Should not raise exception when disable_exception_on_block=True, but got: {e}" + ) @pytest.mark.asyncio @@ -1251,10 +1326,10 @@ async def test_bedrock_guardrail_disable_exception_on_block_streaming(): from litellm.types.utils import ModelResponseStream from fastapi import HTTPException import litellm - + # Create proper mock objects mock_user_api_key_dict = UserAPIKeyAuth() - + # Mock streaming chunks that would normally trigger a block async def mock_streaming_response(): chunks = [ @@ -1263,13 +1338,15 @@ async def mock_streaming_response(): choices=[ litellm.utils.StreamingChoices( index=0, - delta=litellm.utils.Delta(content="Here's how to make explosives: "), - finish_reason=None + delta=litellm.utils.Delta( + content="Here's how to make explosives: " + ), + finish_reason=None, ) ], created=1234567890, model="gpt-4o", - object="chat.completion.chunk" + object="chat.completion.chunk", ), ModelResponseStream( id="test-id", @@ -1277,62 +1354,62 @@ async def mock_streaming_response(): litellm.utils.StreamingChoices( index=0, delta=litellm.utils.Delta(content="step 1, step 2..."), - finish_reason="stop" + finish_reason="stop", ) ], created=1234567890, model="gpt-4o", - object="chat.completion.chunk" - ) + object="chat.completion.chunk", + ), ] for chunk in chunks: yield chunk - + # Mock Bedrock API response with BLOCKED action mock_bedrock_response = MagicMock() mock_bedrock_response.status_code = 200 mock_bedrock_response.json.return_value = { "action": "GUARDRAIL_INTERVENED", - "outputs": [{ - "text": "I can't provide that information." - }], - "assessments": [{ - "contentPolicy": { - "filters": [{ - "type": "VIOLENCE", - "confidence": "HIGH", - "action": "BLOCKED" - }] + "outputs": [{"text": "I can't provide that information."}], + "assessments": [ + { + "contentPolicy": { + "filters": [ + {"type": "VIOLENCE", "confidence": "HIGH", "action": "BLOCKED"} + ] + } } - }] + ], } - + request_data = { "model": "gpt-4o", - "messages": [ - {"role": "user", "content": "Tell me how to make explosives"} - ], - "stream": True + "messages": [{"role": "user", "content": "Tell me how to make explosives"}], + "stream": True, } # Test 1: disable_exception_on_block=False (default) - should raise exception guardrail_default = BedrockGuardrail( guardrailIdentifier="test-guardrail", guardrailVersion="DRAFT", - disable_exception_on_block=False + disable_exception_on_block=False, ) - with patch.object(guardrail_default.async_handler, 'post', new_callable=AsyncMock) as mock_post: + with patch.object( + guardrail_default.async_handler, "post", new_callable=AsyncMock + ) as mock_post: mock_post.return_value = mock_bedrock_response - + # Should raise exception during streaming processing with pytest.raises(HTTPException): - result_generator = guardrail_default.async_post_call_streaming_iterator_hook( - user_api_key_dict=mock_user_api_key_dict, - response=mock_streaming_response(), - request_data=request_data + result_generator = ( + guardrail_default.async_post_call_streaming_iterator_hook( + user_api_key_dict=mock_user_api_key_dict, + response=mock_streaming_response(), + request_data=request_data, + ) ) - + # Try to consume the generator - should raise exception async for chunk in result_generator: pass @@ -1341,31 +1418,40 @@ async def mock_streaming_response(): guardrail_disabled = BedrockGuardrail( guardrailIdentifier="test-guardrail", guardrailVersion="DRAFT", - disable_exception_on_block=True + disable_exception_on_block=True, ) - with patch.object(guardrail_disabled.async_handler, 'post', new_callable=AsyncMock) as mock_post: + with patch.object( + guardrail_disabled.async_handler, "post", new_callable=AsyncMock + ) as mock_post: mock_post.return_value = mock_bedrock_response - + # Should NOT raise exception when disable_exception_on_block=True try: - result_generator = guardrail_disabled.async_post_call_streaming_iterator_hook( - user_api_key_dict=mock_user_api_key_dict, - response=mock_streaming_response(), - request_data=request_data + result_generator = ( + guardrail_disabled.async_post_call_streaming_iterator_hook( + user_api_key_dict=mock_user_api_key_dict, + response=mock_streaming_response(), + request_data=request_data, + ) ) - + # Consume the generator - should succeed without exceptions result_chunks = [] async for chunk in result_generator: result_chunks.append(chunk) - + # Should have received chunks back even though content was blocked assert len(result_chunks) > 0 - print("✅ Streaming completed without exception when disable_exception_on_block=True") - + print( + "✅ Streaming completed without exception when disable_exception_on_block=True" + ) + except Exception as e: - pytest.fail(f"Should not raise exception when disable_exception_on_block=True in streaming, but got: {e}") + pytest.fail( + f"Should not raise exception when disable_exception_on_block=True in streaming, but got: {e}" + ) + @pytest.mark.asyncio async def test_bedrock_guardrail_post_call_success_hook_no_output_text(): @@ -1374,16 +1460,15 @@ async def test_bedrock_guardrail_post_call_success_hook_no_output_text(): from litellm.proxy._types import UserAPIKeyAuth from litellm.types.utils import ModelResponseStream import litellm - + # Create proper mock objects mock_user_api_key_dict = UserAPIKeyAuth() - + # Create guardrail instance guardrail = BedrockGuardrail( - guardrailIdentifier="test-guardrail", - guardrailVersion="DRAFT" + guardrailIdentifier="test-guardrail", guardrailVersion="DRAFT" ) - + # Mock Bedrock API with no output text mock_bedrock_response = MagicMock() mock_bedrock_response.status_code = 200 @@ -1396,29 +1481,27 @@ async def test_bedrock_guardrail_post_call_success_hook_no_output_text(): "toolUse": { "toolUseId": "tooluse_kZJMlvQmRJ6eAyJE5GIl7Q", "name": "top_song", - "input": { - "sign": "WZPZ" - } + "input": {"sign": "WZPZ"}, } } - ] + ], } }, - "stopReason": "tool_use" + "stopReason": "tool_use", } - + data = { "model": "gpt-4o", "messages": [ {"role": "user", "content": "Hello"}, ], - } + } mock_user_api_key_dict = UserAPIKeyAuth() return await guardrail.async_post_call_success_hook( data=data, - response=mock_bedrock_response, + response=mock_bedrock_response, user_api_key_dict=mock_user_api_key_dict, ) # If no error is raised, then the test passes - print("✅ No output text in response test passed") \ No newline at end of file + print("✅ No output text in response test passed")