Fix input/output message not redacted when guardrails_trace="enabled_full" (#1072)

leotac · web-flow · commit db671ba0e95a · 2025-10-31T16:37:59.000-04:00
* fix: detect guardrails with trace="enabled_full"

Fix and simplify _find_detected_and_blocked_policy so that it
correctly works even in case the guardrails assessments contains
both detected and non-detected filters
(as with guardrail_trace="enabled_full")

* test: add bedrock int tests with different guardrail_trace levels

* test: add xfail with guardrail_trace=disabled
diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py
@@ -8,7 +8,7 @@
 import logging
 import os
 import warnings
-from typing import Any, AsyncGenerator, Callable, Iterable, Literal, Optional, Type, TypeVar, Union, cast
+from typing import Any, AsyncGenerator, Callable, Iterable, Literal, Optional, Type, TypeVar, Union, ValuesView, cast
 
 import boto3
 from botocore.config import Config as BotocoreConfig
@@ -878,18 +878,12 @@ def _find_detected_and_blocked_policy(self, input: Any) -> bool:
             if input.get("action") == "BLOCKED" and input.get("detected") and isinstance(input.get("detected"), bool):
                 return True
 
-            # Recursively check all values in the dictionary
-            for value in input.values():
-                if isinstance(value, dict):
-                    return self._find_detected_and_blocked_policy(value)
-                # Handle case where value is a list of dictionaries
-                elif isinstance(value, list):
-                    for item in value:
-                        return self._find_detected_and_blocked_policy(item)
-        elif isinstance(input, list):
-            # Handle case where input is a list of dictionaries
-            for item in input:
-                return self._find_detected_and_blocked_policy(item)
+            # Otherwise, recursively check all values in the dictionary
+            return self._find_detected_and_blocked_policy(input.values())
+
+        elif isinstance(input, (list, ValuesView)):
+            # Handle case where input is a list or dict_values
+            return any(self._find_detected_and_blocked_policy(item) for item in input)
         # Otherwise return False
         return False
 
diff --git a/tests/strands/models/test_bedrock.py b/tests/strands/models/test_bedrock.py
@@ -663,6 +663,99 @@ async def test_stream_stream_input_guardrails(
     bedrock_client.converse_stream.assert_called_once_with(**request)
 
 
+@pytest.mark.asyncio
+async def test_stream_stream_input_guardrails_full_trace(
+    bedrock_client, model, messages, tool_spec, model_id, additional_request_fields, alist
+):
+    """Test guardrails are correctly detected also with guardrail_trace="enabled_full".
+    In that case bedrock returns all filters, including those not detected/blocked."""
+    metadata_event = {
+        "metadata": {
+            "usage": {"inputTokens": 0, "outputTokens": 0, "totalTokens": 0},
+            "metrics": {"latencyMs": 245},
+            "trace": {
+                "guardrail": {
+                    "inputAssessment": {
+                        "jrv9qlue4hag": {
+                            "contentPolicy": {
+                                "filters": [
+                                    {
+                                        "action": "NONE",
+                                        "confidence": "NONE",
+                                        "detected": False,
+                                        "filterStrength": "HIGH",
+                                        "type": "SEXUAL",
+                                    },
+                                    {
+                                        "action": "BLOCKED",
+                                        "confidence": "LOW",
+                                        "detected": True,
+                                        "filterStrength": "HIGH",
+                                        "type": "VIOLENCE",
+                                    },
+                                    {
+                                        "action": "NONE",
+                                        "confidence": "NONE",
+                                        "detected": False,
+                                        "filterStrength": "HIGH",
+                                        "type": "HATE",
+                                    },
+                                    {
+                                        "action": "NONE",
+                                        "confidence": "NONE",
+                                        "detected": False,
+                                        "filterStrength": "HIGH",
+                                        "type": "INSULTS",
+                                    },
+                                    {
+                                        "action": "NONE",
+                                        "confidence": "NONE",
+                                        "detected": False,
+                                        "filterStrength": "HIGH",
+                                        "type": "PROMPT_ATTACK",
+                                    },
+                                    {
+                                        "action": "NONE",
+                                        "confidence": "NONE",
+                                        "detected": False,
+                                        "filterStrength": "HIGH",
+                                        "type": "MISCONDUCT",
+                                    },
+                                ]
+                            }
+                        }
+                    }
+                }
+            },
+        }
+    }
+    bedrock_client.converse_stream.return_value = {"stream": [metadata_event]}
+
+    request = {
+        "additionalModelRequestFields": additional_request_fields,
+        "inferenceConfig": {},
+        "modelId": model_id,
+        "messages": messages,
+        "system": [],
+        "toolConfig": {
+            "tools": [{"toolSpec": tool_spec}],
+            "toolChoice": {"auto": {}},
+        },
+    }
+
+    model.update_config(additional_request_fields=additional_request_fields)
+    response = model.stream(messages, [tool_spec])
+
+    tru_chunks = await alist(response)
+    exp_chunks = [
+        {"redactContent": {"redactUserContentMessage": "[User input redacted.]"}},
+        metadata_event,
+    ]
+
+    assert tru_chunks == exp_chunks
+    bedrock_client.converse_stream.assert_called_once_with(**request)
+
+
 @pytest.mark.asyncio
 async def test_stream_stream_output_guardrails(
     bedrock_client, model, messages, tool_spec, model_id, additional_request_fields, alist
diff --git a/tests_integ/test_bedrock_guardrails.py b/tests_integ/test_bedrock_guardrails.py
@@ -100,11 +100,21 @@ def wait_for_guardrail_active(bedrock_client, guardrail_id, max_attempts=10, del
     raise RuntimeError("Guardrail did not become active.")
 
 
-def test_guardrail_input_intervention(boto_session, bedrock_guardrail):
+@pytest.mark.parametrize(
+    "guardrail_trace",
+    [
+        pytest.param("disabled", marks=pytest.mark.xfail(reason='redact fails with trace="disabled"')),
+        "enabled",
+        "enabled_full",
+    ],
+)
+def test_guardrail_input_intervention(boto_session, bedrock_guardrail, guardrail_trace):
     bedrock_model = BedrockModel(
         guardrail_id=bedrock_guardrail,
         guardrail_version="DRAFT",
         boto_session=boto_session,
+        guardrail_trace=guardrail_trace,
+        guardrail_redact_input_message="Redacted.",
     )
 
     agent = Agent(model=bedrock_model, system_prompt="You are a helpful assistant.", callback_handler=None)
@@ -116,6 +126,7 @@ def test_guardrail_input_intervention(boto_session, bedrock_guardrail):
     assert str(response1).strip() == BLOCKED_INPUT
     assert response2.stop_reason != "guardrail_intervened"
     assert str(response2).strip() != BLOCKED_INPUT
+    assert agent.messages[0]["content"][0]["text"] == "Redacted."
 
 
 @pytest.mark.parametrize("processing_mode", ["sync", "async"])
@@ -159,13 +170,15 @@ def test_guardrail_output_intervention(boto_session, bedrock_guardrail, processi
         )
 
 
+@pytest.mark.parametrize("guardrail_trace", ["enabled", "enabled_full"])
 @pytest.mark.parametrize("processing_mode", ["sync", "async"])
-def test_guardrail_output_intervention_redact_output(bedrock_guardrail, processing_mode):
+def test_guardrail_output_intervention_redact_output(bedrock_guardrail, processing_mode, guardrail_trace):
     REDACT_MESSAGE = "Redacted."
     bedrock_model = BedrockModel(
         guardrail_id=bedrock_guardrail,
         guardrail_version="DRAFT",
         guardrail_stream_processing_mode=processing_mode,
+        guardrail_trace=guardrail_trace,
         guardrail_redact_output=True,
         guardrail_redact_output_message=REDACT_MESSAGE,
         region_name="us-east-1",