diff --git a/src/eva/metrics/processor.py b/src/eva/metrics/processor.py index ad4d5d6c..69160e9a 100644 --- a/src/eva/metrics/processor.py +++ b/src/eva/metrics/processor.py @@ -90,16 +90,23 @@ class _TurnExtractionState: # user_speech lands at the same turn. rollback_advance_consumed_by_user: bool = False - def advance_turn_if_needed(self) -> None: + def advance_turn_if_needed(self, from_audio_start: bool = False) -> None: """Advance turn if the assistant responded since the last user event. Called on audio_start(elevenlabs_user) and audit_log/user events. - After an interruption, hold_turn consumes one advance without incrementing. + After an interruption, hold_turn suppresses one advance from audit_log/user + (late STT from the interrupted session) but never blocks audio_start + (the user speaking again always starts a new turn). """ if self.hold_turn: - self.hold_turn = False - self.assistant_spoke_in_turn = False - return + if from_audio_start: + # New user speech — clear hold_turn but still advance + self.hold_turn = False + else: + # Late STT chunk from interrupted session — consume without advancing + self.hold_turn = False + self.assistant_spoke_in_turn = False + return if self.assistant_spoke_in_turn: self.turn_num += 1 self.assistant_spoke_in_turn = False @@ -327,7 +334,7 @@ def _handle_audio_start( state.assistant_spoke_in_turn = True state.pending_advance_after_rollback = False state.rollback_advance_consumed_by_user = False - state.advance_turn_if_needed() + state.advance_turn_if_needed(from_audio_start=True) # Mark the NEW turn (after advance) as a user-interrupted turn — the user's interrupting speech # lands here, symmetric with assistant_interrupted_turns. if state.pending_user_interrupts_label: diff --git a/tests/fixtures/processor_histories.json b/tests/fixtures/processor_histories.json index 10afc5cb..21133a90 100644 --- a/tests/fixtures/processor_histories.json +++ b/tests/fixtures/processor_histories.json @@ -980,5 +980,68 @@ "assistant_interrupted_turns": [], "user_interrupted_turns": [2] } + }, + { + "id": "assistant_interrupts_then_new_user_turn", + "description": "Based on record 5.1.3: Assistant audio_start overlaps with user audio by ~50ms (timing artifact, not a real barge-in). This triggers assistant_interrupted_turns and hold_turn. The user then speaks again in a NEW audio session. Tests that hold_turn does NOT prevent the new user audio_start from advancing the turn — user speaking again must always start a new turn.", + "history": [ + {"timestamp_ms": 1000, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Hello! How can I help you today?"}}, + {"timestamp_ms": 1100, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.0}}, + {"timestamp_ms": 1500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Hello, how can I help you today?"}}}, + {"timestamp_ms": 1800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 1.8}}, + + {"timestamp_ms": 2000, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 2.0}}, + {"timestamp_ms": 2100, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "My flight got canceled and I want a full refund."}}}, + {"timestamp_ms": 2200, "source": "audit_log", "event_type": "user", "data": "My flight got cancelled and I want a full refund."}, + {"timestamp_ms": 2350, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 2.35}}, + {"timestamp_ms": 2400, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 2.4}}, + {"timestamp_ms": 2500, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I'm sorry to hear that. Could you provide your confirmation number?"}}, + {"timestamp_ms": 2550, "source": "audit_log", "event_type": "assistant", "data": "I'm sorry to hear that. Could you provide your confirmation number?"}, + {"timestamp_ms": 3000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 3.0}}, + {"timestamp_ms": 3100, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I'm sorry to hear that. Could you provide your confirmation number?"}}}, + + {"timestamp_ms": 4000, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 4.0}}, + {"timestamp_ms": 4100, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Confirmation code is Z5OROH and last name is White."}}}, + {"timestamp_ms": 4200, "source": "audit_log", "event_type": "user", "data": "Confirmation code is Z5OROH."}, + {"timestamp_ms": 4300, "source": "audit_log", "event_type": "user", "data": "And last name is White."}, + {"timestamp_ms": 4500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 4.5}}, + {"timestamp_ms": 4600, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "get_reservation", "parameters": {"confirmation_number": "Z5OROH", "last_name": "White"}}}, + {"timestamp_ms": 4700, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "get_reservation", "response": {"status": "success", "reservation": {"confirmation_number": "Z5OROH"}}}}, + {"timestamp_ms": 5000, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I found your reservation. Your flight was canceled, so you are eligible for a full refund. Shall I proceed?"}}, + {"timestamp_ms": 5050, "source": "audit_log", "event_type": "assistant", "data": "I found your reservation. Your flight was canceled, so you are eligible for a full refund. Shall I proceed?"}, + {"timestamp_ms": 5100, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 5.1}}, + {"timestamp_ms": 5500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 5.5}}, + {"timestamp_ms": 5600, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I found your reservation. Your flight was canceled, so you are eligible for a full refund. Shall I proceed?"}}}, + + {"timestamp_ms": 6000, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 6.0}}, + {"timestamp_ms": 6100, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Yes, go ahead."}}}, + {"timestamp_ms": 6200, "source": "audit_log", "event_type": "user", "data": "Yes, go ahead."}, + {"timestamp_ms": 6500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 6.5}}, + {"timestamp_ms": 7000, "source": "elevenlabs", "event_type": "connection_state", "data": {"data": {"state": "session_ended"}}} + ], + "expected": { + "transcribed_assistant_turns": {"0": "Hello, how can I help you today?", "1": "[assistant interrupts] I'm sorry to hear that. Could you provide your confirmation number?", "2": "I found your reservation. Your flight was canceled, so you are eligible for a full refund. Shall I proceed?"}, + "transcribed_user_turns": {"1": "My flight got cancelled and I want a full refund.", "2": "Confirmation code is Z5OROH. And last name is White.", "3": "Yes, go ahead."}, + "intended_assistant_turns": {"0": "Hello! How can I help you today?", "1": "[assistant interrupts] I'm sorry to hear that. Could you provide your confirmation number?", "2": "I found your reservation. Your flight was canceled, so you are eligible for a full refund. Shall I proceed?"}, + "intended_user_turns": {"1": "My flight got canceled and I want a full refund.", "2": "Confirmation code is Z5OROH and last name is White.", "3": "Yes, go ahead."}, + "audio_timestamps_assistant_turns": {"0": [[1.0, 1.8]], "1": [[2.35, 3.0]], "2": [[5.1, 5.5]]}, + "audio_timestamps_user_turns": {"1": [[2.0, 2.4]], "2": [[4.0, 4.5]], "3": [[6.0, 6.5]]}, + "num_assistant_turns": 3, + "num_user_turns": 3, + "num_tool_calls": 1, + "tool_called": ["get_reservation"], + "conversation_trace": [ + {"role": "assistant", "content": "Hello! How can I help you today?", "type": "intended", "turn_id": 0}, + {"role": "user", "content": "My flight got cancelled and I want a full refund. [likely cut off by assistant]", "type": "transcribed", "turn_id": 1}, + {"role": "assistant", "content": "[assistant interrupts] I'm sorry to hear that. Could you provide your confirmation number?", "type": "intended", "turn_id": 1}, + {"role": "user", "content": "Confirmation code is Z5OROH. And last name is White.", "type": "transcribed", "turn_id": 2}, + {"tool_name": "get_reservation", "parameters": {"confirmation_number": "Z5OROH", "last_name": "White"}, "type": "tool_call", "turn_id": 2}, + {"tool_name": "get_reservation", "tool_response": {"status": "success", "reservation": {"confirmation_number": "Z5OROH"}}, "type": "tool_response", "turn_id": 2}, + {"role": "assistant", "content": "I found your reservation. Your flight was canceled, so you are eligible for a full refund. Shall I proceed?", "type": "intended", "turn_id": 2}, + {"role": "user", "content": "Yes, go ahead.", "type": "transcribed", "turn_id": 3} + ], + "assistant_interrupted_turns": [1], + "user_interrupted_turns": [] + } } ]