From 600308ec5dbfef89a73d7f81f0dfd515288c4efd Mon Sep 17 00:00:00 2001 From: brucearctor <5032356+brucearctor@users.noreply.github.com> Date: Thu, 19 Mar 2026 23:11:57 -0700 Subject: [PATCH 1/2] feat: support pre-populated session events in SessionInput for eval cases Add an optional 'events' field to SessionInput that accepts a list of Event objects. After create_session, the eval framework appends these events to the session before starting inference. This seeds the session with conversation history so that eval turns run with realistic prior context without being scored. Closes #4896 --- src/google/adk/evaluation/eval_case.py | 9 ++ .../adk/evaluation/evaluation_generator.py | 8 +- .../evaluation/test_evaluation_generator.py | 109 ++++++++++++++++++ 3 files changed, 125 insertions(+), 1 deletion(-) diff --git a/src/google/adk/evaluation/eval_case.py b/src/google/adk/evaluation/eval_case.py index 8560762483..8a89801b62 100644 --- a/src/google/adk/evaluation/eval_case.py +++ b/src/google/adk/evaluation/eval_case.py @@ -23,6 +23,7 @@ from pydantic import model_validator from typing_extensions import TypeAlias +from ..events.event import Event from .app_details import AppDetails from .common import EvalBaseModel from .conversation_scenarios import ConversationScenario @@ -124,6 +125,14 @@ class SessionInput(EvalBaseModel): state: SessionState = Field(default_factory=dict) """The state of the session.""" + events: Optional[list[Event]] = None + """Optional list of events to seed the session with prior conversation history. + + When provided, these events are appended to the session after creation but + before inference begins. This enables eval cases to test agent behavior that + depends on prior context without scoring the context turns. + """ + StaticConversation: TypeAlias = list[Invocation] """A conversation where the user's queries for each invocation are already specified.""" diff --git a/src/google/adk/evaluation/evaluation_generator.py b/src/google/adk/evaluation/evaluation_generator.py index 725bddc11c..5f217e54ac 100644 --- a/src/google/adk/evaluation/evaluation_generator.py +++ b/src/google/adk/evaluation/evaluation_generator.py @@ -212,13 +212,19 @@ async def _generate_inferences_from_root_agent( user_id = initial_session.user_id if initial_session else "test_user_id" session_id = session_id if session_id else str(uuid.uuid4()) - _ = await session_service.create_session( + session = await session_service.create_session( app_name=app_name, user_id=user_id, state=initial_session.state if initial_session else {}, session_id=session_id, ) + # Seed session with prior conversation history so that eval turns + # run with realistic prior context without being scored. + if initial_session and initial_session.events: + for event in initial_session.events: + await session_service.append_event(session=session, event=event) + if not artifact_service: artifact_service = InMemoryArtifactService() diff --git a/tests/unittests/evaluation/test_evaluation_generator.py b/tests/unittests/evaluation/test_evaluation_generator.py index 29ac75ffb5..36f819d2fc 100644 --- a/tests/unittests/evaluation/test_evaluation_generator.py +++ b/tests/unittests/evaluation/test_evaluation_generator.py @@ -457,3 +457,112 @@ async def mock_generate_inferences_side_effect( mock_generate_inferences.assert_called_once() called_with_content = mock_generate_inferences.call_args.args[3] assert called_with_content.parts[0].text == "message 1" + + @pytest.mark.asyncio + async def test_seed_events_are_appended_to_session(self, mocker, mock_runner): + """Tests that seed events from SessionInput.events are appended after create_session.""" + from google.adk.evaluation.eval_case import SessionInput + + mock_agent = mocker.MagicMock() + mock_user_sim = mocker.MagicMock(spec=UserSimulator) + mock_user_sim.get_next_user_message = mocker.AsyncMock( + return_value=NextUserMessage( + status=UserSimulatorStatus.STOP_SIGNAL_DETECTED + ) + ) + + # Create a real InMemorySessionService so we can verify events are appended + mock_session_service = mocker.MagicMock() + mock_session = mocker.MagicMock() + mock_session_service.create_session = mocker.AsyncMock( + return_value=mock_session + ) + mock_session_service.append_event = mocker.AsyncMock() + + mocker.patch( + "google.adk.evaluation.evaluation_generator.EvaluationGenerator._get_app_details_by_invocation_id" + ) + mocker.patch( + "google.adk.evaluation.evaluation_generator.EvaluationGenerator.convert_events_to_eval_invocations" + ) + + seed_event_1 = Event( + content=types.Content( + parts=[types.Part(text="How do I pay my water bill?")], + role="user", + ), + author="user", + invocation_id="seed-0", + ) + seed_event_2 = Event( + content=types.Content( + parts=[types.Part(text="You can pay by calling ABC.")], + role="model", + ), + author="root_agent", + invocation_id="seed-0", + ) + + initial_session = SessionInput( + app_name="test_app", + user_id="test_user", + events=[seed_event_1, seed_event_2], + ) + + await EvaluationGenerator._generate_inferences_from_root_agent( + root_agent=mock_agent, + user_simulator=mock_user_sim, + initial_session=initial_session, + session_service=mock_session_service, + ) + + # Verify append_event was called for each seed event + assert mock_session_service.append_event.call_count == 2 + mock_session_service.append_event.assert_any_call( + session=mock_session, event=seed_event_1 + ) + mock_session_service.append_event.assert_any_call( + session=mock_session, event=seed_event_2 + ) + + @pytest.mark.asyncio + async def test_no_seed_events_when_events_is_none(self, mocker, mock_runner): + """Tests that append_event is not called when SessionInput.events is None.""" + from google.adk.evaluation.eval_case import SessionInput + + mock_agent = mocker.MagicMock() + mock_user_sim = mocker.MagicMock(spec=UserSimulator) + mock_user_sim.get_next_user_message = mocker.AsyncMock( + return_value=NextUserMessage( + status=UserSimulatorStatus.STOP_SIGNAL_DETECTED + ) + ) + + mock_session_service = mocker.MagicMock() + mock_session = mocker.MagicMock() + mock_session_service.create_session = mocker.AsyncMock( + return_value=mock_session + ) + mock_session_service.append_event = mocker.AsyncMock() + + mocker.patch( + "google.adk.evaluation.evaluation_generator.EvaluationGenerator._get_app_details_by_invocation_id" + ) + mocker.patch( + "google.adk.evaluation.evaluation_generator.EvaluationGenerator.convert_events_to_eval_invocations" + ) + + initial_session = SessionInput( + app_name="test_app", + user_id="test_user", + ) + + await EvaluationGenerator._generate_inferences_from_root_agent( + root_agent=mock_agent, + user_simulator=mock_user_sim, + initial_session=initial_session, + session_service=mock_session_service, + ) + + # Verify append_event was NOT called + mock_session_service.append_event.assert_not_called() From b89b9bb9d1be15606e32d3ed1920f5152fde3c38 Mon Sep 17 00:00:00 2001 From: brucearctor <5032356+brucearctor@users.noreply.github.com> Date: Sat, 21 Mar 2026 21:36:16 -0700 Subject: [PATCH 2/2] refactor: rename SessionInput.events to conversation_history for clarity --- src/google/adk/evaluation/eval_case.py | 2 +- src/google/adk/evaluation/evaluation_generator.py | 4 ++-- tests/unittests/evaluation/test_evaluation_generator.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/google/adk/evaluation/eval_case.py b/src/google/adk/evaluation/eval_case.py index 8a89801b62..09f1dba151 100644 --- a/src/google/adk/evaluation/eval_case.py +++ b/src/google/adk/evaluation/eval_case.py @@ -125,7 +125,7 @@ class SessionInput(EvalBaseModel): state: SessionState = Field(default_factory=dict) """The state of the session.""" - events: Optional[list[Event]] = None + conversation_history: Optional[list[Event]] = None """Optional list of events to seed the session with prior conversation history. When provided, these events are appended to the session after creation but diff --git a/src/google/adk/evaluation/evaluation_generator.py b/src/google/adk/evaluation/evaluation_generator.py index 5f217e54ac..8ecf0f8aca 100644 --- a/src/google/adk/evaluation/evaluation_generator.py +++ b/src/google/adk/evaluation/evaluation_generator.py @@ -221,8 +221,8 @@ async def _generate_inferences_from_root_agent( # Seed session with prior conversation history so that eval turns # run with realistic prior context without being scored. - if initial_session and initial_session.events: - for event in initial_session.events: + if initial_session and initial_session.conversation_history: + for event in initial_session.conversation_history: await session_service.append_event(session=session, event=event) if not artifact_service: diff --git a/tests/unittests/evaluation/test_evaluation_generator.py b/tests/unittests/evaluation/test_evaluation_generator.py index 36f819d2fc..1720dc07b5 100644 --- a/tests/unittests/evaluation/test_evaluation_generator.py +++ b/tests/unittests/evaluation/test_evaluation_generator.py @@ -460,7 +460,7 @@ async def mock_generate_inferences_side_effect( @pytest.mark.asyncio async def test_seed_events_are_appended_to_session(self, mocker, mock_runner): - """Tests that seed events from SessionInput.events are appended after create_session.""" + """Tests that seed events from SessionInput.conversation_history are appended after create_session.""" from google.adk.evaluation.eval_case import SessionInput mock_agent = mocker.MagicMock() @@ -506,7 +506,7 @@ async def test_seed_events_are_appended_to_session(self, mocker, mock_runner): initial_session = SessionInput( app_name="test_app", user_id="test_user", - events=[seed_event_1, seed_event_2], + conversation_history=[seed_event_1, seed_event_2], ) await EvaluationGenerator._generate_inferences_from_root_agent( @@ -527,7 +527,7 @@ async def test_seed_events_are_appended_to_session(self, mocker, mock_runner): @pytest.mark.asyncio async def test_no_seed_events_when_events_is_none(self, mocker, mock_runner): - """Tests that append_event is not called when SessionInput.events is None.""" + """Tests that append_event is not called when SessionInput.conversation_history is None.""" from google.adk.evaluation.eval_case import SessionInput mock_agent = mocker.MagicMock()