|
1 | 1 | import asyncio |
2 | 2 | import pytest |
3 | 3 |
|
| 4 | +from typing import Annotated |
| 5 | +from pydantic import Field |
| 6 | + |
4 | 7 | from sentry_sdk.integrations.pydantic_ai import PydanticAIIntegration |
5 | 8 |
|
6 | 9 | from pydantic_ai import Agent |
7 | 10 | from pydantic_ai.models.test import TestModel |
| 11 | +from pydantic_ai.exceptions import ModelRetry, UnexpectedModelBehavior |
8 | 12 |
|
9 | 13 |
|
10 | 14 | @pytest.fixture |
@@ -277,6 +281,157 @@ def add_numbers(a: int, b: int) -> int: |
277 | 281 | assert "add_numbers" in available_tools_str |
278 | 282 |
|
279 | 283 |
|
| 284 | +@pytest.mark.parametrize( |
| 285 | + "handled_tool_call_exceptions", |
| 286 | + [False, True], |
| 287 | +) |
| 288 | +@pytest.mark.asyncio |
| 289 | +async def test_agent_with_tool_model_retry( |
| 290 | + sentry_init, capture_events, test_agent, handled_tool_call_exceptions |
| 291 | +): |
| 292 | + """ |
| 293 | + Test that a handled exception is captured when a tool raises ModelRetry. |
| 294 | + """ |
| 295 | + |
| 296 | + retries = 0 |
| 297 | + |
| 298 | + @test_agent.tool_plain |
| 299 | + def add_numbers(a: int, b: int) -> float: |
| 300 | + """Add two numbers together, but raises an exception on the first attempt.""" |
| 301 | + nonlocal retries |
| 302 | + if retries == 0: |
| 303 | + retries += 1 |
| 304 | + raise ModelRetry(message="Try again with the same arguments.") |
| 305 | + return a + b |
| 306 | + |
| 307 | + sentry_init( |
| 308 | + integrations=[ |
| 309 | + PydanticAIIntegration( |
| 310 | + handled_tool_call_exceptions=handled_tool_call_exceptions |
| 311 | + ) |
| 312 | + ], |
| 313 | + traces_sample_rate=1.0, |
| 314 | + send_default_pii=True, |
| 315 | + ) |
| 316 | + |
| 317 | + events = capture_events() |
| 318 | + |
| 319 | + result = await test_agent.run("What is 5 + 3?") |
| 320 | + |
| 321 | + assert result is not None |
| 322 | + |
| 323 | + if handled_tool_call_exceptions: |
| 324 | + (error, transaction) = events |
| 325 | + else: |
| 326 | + (transaction,) = events |
| 327 | + spans = transaction["spans"] |
| 328 | + |
| 329 | + if handled_tool_call_exceptions: |
| 330 | + assert error["level"] == "error" |
| 331 | + assert error["exception"]["values"][0]["mechanism"]["handled"] |
| 332 | + |
| 333 | + # Find child span types (invoke_agent is the transaction, not a child span) |
| 334 | + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] |
| 335 | + tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] |
| 336 | + |
| 337 | + # Should have tool spans |
| 338 | + assert len(tool_spans) >= 1 |
| 339 | + |
| 340 | + # Check tool spans |
| 341 | + model_retry_tool_span = tool_spans[0] |
| 342 | + assert "execute_tool" in model_retry_tool_span["description"] |
| 343 | + assert model_retry_tool_span["data"]["gen_ai.operation.name"] == "execute_tool" |
| 344 | + assert model_retry_tool_span["data"]["gen_ai.tool.type"] == "function" |
| 345 | + assert model_retry_tool_span["data"]["gen_ai.tool.name"] == "add_numbers" |
| 346 | + assert "gen_ai.tool.input" in model_retry_tool_span["data"] |
| 347 | + |
| 348 | + tool_span = tool_spans[1] |
| 349 | + assert "execute_tool" in tool_span["description"] |
| 350 | + assert tool_span["data"]["gen_ai.operation.name"] == "execute_tool" |
| 351 | + assert tool_span["data"]["gen_ai.tool.type"] == "function" |
| 352 | + assert tool_span["data"]["gen_ai.tool.name"] == "add_numbers" |
| 353 | + assert "gen_ai.tool.input" in tool_span["data"] |
| 354 | + assert "gen_ai.tool.output" in tool_span["data"] |
| 355 | + |
| 356 | + # Check chat spans have available_tools |
| 357 | + for chat_span in chat_spans: |
| 358 | + assert "gen_ai.request.available_tools" in chat_span["data"] |
| 359 | + available_tools_str = chat_span["data"]["gen_ai.request.available_tools"] |
| 360 | + # Available tools is serialized as a string |
| 361 | + assert "add_numbers" in available_tools_str |
| 362 | + |
| 363 | + |
| 364 | +@pytest.mark.parametrize( |
| 365 | + "handled_tool_call_exceptions", |
| 366 | + [False, True], |
| 367 | +) |
| 368 | +@pytest.mark.asyncio |
| 369 | +async def test_agent_with_tool_validation_error( |
| 370 | + sentry_init, capture_events, test_agent, handled_tool_call_exceptions |
| 371 | +): |
| 372 | + """ |
| 373 | + Test that a handled exception is captured when a tool has unsatisfiable constraints. |
| 374 | + """ |
| 375 | + |
| 376 | + @test_agent.tool_plain |
| 377 | + def add_numbers(a: Annotated[int, Field(gt=0, lt=0)], b: int) -> int: |
| 378 | + """Add two numbers together.""" |
| 379 | + return a + b |
| 380 | + |
| 381 | + sentry_init( |
| 382 | + integrations=[ |
| 383 | + PydanticAIIntegration( |
| 384 | + handled_tool_call_exceptions=handled_tool_call_exceptions |
| 385 | + ) |
| 386 | + ], |
| 387 | + traces_sample_rate=1.0, |
| 388 | + send_default_pii=True, |
| 389 | + ) |
| 390 | + |
| 391 | + events = capture_events() |
| 392 | + |
| 393 | + result = None |
| 394 | + with pytest.raises(UnexpectedModelBehavior): |
| 395 | + result = await test_agent.run("What is 5 + 3?") |
| 396 | + |
| 397 | + assert result is None |
| 398 | + |
| 399 | + if handled_tool_call_exceptions: |
| 400 | + (error, model_behaviour_error, transaction) = events |
| 401 | + else: |
| 402 | + ( |
| 403 | + model_behaviour_error, |
| 404 | + transaction, |
| 405 | + ) = events |
| 406 | + spans = transaction["spans"] |
| 407 | + |
| 408 | + if handled_tool_call_exceptions: |
| 409 | + assert error["level"] == "error" |
| 410 | + assert error["exception"]["values"][0]["mechanism"]["handled"] |
| 411 | + |
| 412 | + # Find child span types (invoke_agent is the transaction, not a child span) |
| 413 | + chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"] |
| 414 | + tool_spans = [s for s in spans if s["op"] == "gen_ai.execute_tool"] |
| 415 | + |
| 416 | + # Should have tool spans |
| 417 | + assert len(tool_spans) >= 1 |
| 418 | + |
| 419 | + # Check tool spans |
| 420 | + model_retry_tool_span = tool_spans[0] |
| 421 | + assert "execute_tool" in model_retry_tool_span["description"] |
| 422 | + assert model_retry_tool_span["data"]["gen_ai.operation.name"] == "execute_tool" |
| 423 | + assert model_retry_tool_span["data"]["gen_ai.tool.type"] == "function" |
| 424 | + assert model_retry_tool_span["data"]["gen_ai.tool.name"] == "add_numbers" |
| 425 | + assert "gen_ai.tool.input" in model_retry_tool_span["data"] |
| 426 | + |
| 427 | + # Check chat spans have available_tools |
| 428 | + for chat_span in chat_spans: |
| 429 | + assert "gen_ai.request.available_tools" in chat_span["data"] |
| 430 | + available_tools_str = chat_span["data"]["gen_ai.request.available_tools"] |
| 431 | + # Available tools is serialized as a string |
| 432 | + assert "add_numbers" in available_tools_str |
| 433 | + |
| 434 | + |
280 | 435 | @pytest.mark.asyncio |
281 | 436 | async def test_agent_with_tools_streaming(sentry_init, capture_events, test_agent): |
282 | 437 | """ |
|
0 commit comments