Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions src/eva/metrics/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,16 +90,23 @@ class _TurnExtractionState:
# user_speech lands at the same turn.
rollback_advance_consumed_by_user: bool = False

def advance_turn_if_needed(self) -> None:
def advance_turn_if_needed(self, from_audio_start: bool = False) -> None:
"""Advance turn if the assistant responded since the last user event.

Called on audio_start(elevenlabs_user) and audit_log/user events.
After an interruption, hold_turn consumes one advance without incrementing.
After an interruption, hold_turn suppresses one advance from audit_log/user
(late STT from the interrupted session) but never blocks audio_start
(the user speaking again always starts a new turn).
"""
if self.hold_turn:
self.hold_turn = False
self.assistant_spoke_in_turn = False
return
if from_audio_start:
# New user speech — clear hold_turn but still advance
self.hold_turn = False
else:
# Late STT chunk from interrupted session — consume without advancing
self.hold_turn = False
self.assistant_spoke_in_turn = False
return
if self.assistant_spoke_in_turn:
self.turn_num += 1
self.assistant_spoke_in_turn = False
Expand Down Expand Up @@ -327,7 +334,7 @@ def _handle_audio_start(
state.assistant_spoke_in_turn = True
state.pending_advance_after_rollback = False
state.rollback_advance_consumed_by_user = False
state.advance_turn_if_needed()
state.advance_turn_if_needed(from_audio_start=True)
# Mark the NEW turn (after advance) as a user-interrupted turn — the user's interrupting speech
# lands here, symmetric with assistant_interrupted_turns.
if state.pending_user_interrupts_label:
Expand Down
63 changes: 63 additions & 0 deletions tests/fixtures/processor_histories.json
Original file line number Diff line number Diff line change
Expand Up @@ -980,5 +980,68 @@
"assistant_interrupted_turns": [],
"user_interrupted_turns": [2]
}
},
{
"id": "assistant_interrupts_then_new_user_turn",
"description": "Based on record 5.1.3: Assistant audio_start overlaps with user audio by ~50ms (timing artifact, not a real barge-in). This triggers assistant_interrupted_turns and hold_turn. The user then speaks again in a NEW audio session. Tests that hold_turn does NOT prevent the new user audio_start from advancing the turn — user speaking again must always start a new turn.",
"history": [
{"timestamp_ms": 1000, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Hello! How can I help you today?"}},
{"timestamp_ms": 1100, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.0}},
{"timestamp_ms": 1500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Hello, how can I help you today?"}}},
{"timestamp_ms": 1800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 1.8}},

{"timestamp_ms": 2000, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 2.0}},
{"timestamp_ms": 2100, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "My flight got canceled and I want a full refund."}}},
{"timestamp_ms": 2200, "source": "audit_log", "event_type": "user", "data": "My flight got cancelled and I want a full refund."},
{"timestamp_ms": 2350, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 2.35}},
{"timestamp_ms": 2400, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 2.4}},
{"timestamp_ms": 2500, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I'm sorry to hear that. Could you provide your confirmation number?"}},
{"timestamp_ms": 2550, "source": "audit_log", "event_type": "assistant", "data": "I'm sorry to hear that. Could you provide your confirmation number?"},
{"timestamp_ms": 3000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 3.0}},
{"timestamp_ms": 3100, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I'm sorry to hear that. Could you provide your confirmation number?"}}},

{"timestamp_ms": 4000, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 4.0}},
{"timestamp_ms": 4100, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Confirmation code is Z5OROH and last name is White."}}},
{"timestamp_ms": 4200, "source": "audit_log", "event_type": "user", "data": "Confirmation code is Z5OROH."},
{"timestamp_ms": 4300, "source": "audit_log", "event_type": "user", "data": "And last name is White."},
{"timestamp_ms": 4500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 4.5}},
{"timestamp_ms": 4600, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "get_reservation", "parameters": {"confirmation_number": "Z5OROH", "last_name": "White"}}},
{"timestamp_ms": 4700, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "get_reservation", "response": {"status": "success", "reservation": {"confirmation_number": "Z5OROH"}}}},
{"timestamp_ms": 5000, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I found your reservation. Your flight was canceled, so you are eligible for a full refund. Shall I proceed?"}},
{"timestamp_ms": 5050, "source": "audit_log", "event_type": "assistant", "data": "I found your reservation. Your flight was canceled, so you are eligible for a full refund. Shall I proceed?"},
{"timestamp_ms": 5100, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 5.1}},
{"timestamp_ms": 5500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 5.5}},
{"timestamp_ms": 5600, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I found your reservation. Your flight was canceled, so you are eligible for a full refund. Shall I proceed?"}}},

{"timestamp_ms": 6000, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 6.0}},
{"timestamp_ms": 6100, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Yes, go ahead."}}},
{"timestamp_ms": 6200, "source": "audit_log", "event_type": "user", "data": "Yes, go ahead."},
{"timestamp_ms": 6500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 6.5}},
{"timestamp_ms": 7000, "source": "elevenlabs", "event_type": "connection_state", "data": {"data": {"state": "session_ended"}}}
],
"expected": {
"transcribed_assistant_turns": {"0": "Hello, how can I help you today?", "1": "[assistant interrupts] I'm sorry to hear that. Could you provide your confirmation number?", "2": "I found your reservation. Your flight was canceled, so you are eligible for a full refund. Shall I proceed?"},
"transcribed_user_turns": {"1": "My flight got cancelled and I want a full refund.", "2": "Confirmation code is Z5OROH. And last name is White.", "3": "Yes, go ahead."},
"intended_assistant_turns": {"0": "Hello! How can I help you today?", "1": "[assistant interrupts] I'm sorry to hear that. Could you provide your confirmation number?", "2": "I found your reservation. Your flight was canceled, so you are eligible for a full refund. Shall I proceed?"},
"intended_user_turns": {"1": "My flight got canceled and I want a full refund.", "2": "Confirmation code is Z5OROH and last name is White.", "3": "Yes, go ahead."},
"audio_timestamps_assistant_turns": {"0": [[1.0, 1.8]], "1": [[2.35, 3.0]], "2": [[5.1, 5.5]]},
"audio_timestamps_user_turns": {"1": [[2.0, 2.4]], "2": [[4.0, 4.5]], "3": [[6.0, 6.5]]},
"num_assistant_turns": 3,
"num_user_turns": 3,
"num_tool_calls": 1,
"tool_called": ["get_reservation"],
"conversation_trace": [
{"role": "assistant", "content": "Hello! How can I help you today?", "type": "intended", "turn_id": 0},
{"role": "user", "content": "My flight got cancelled and I want a full refund. [likely cut off by assistant]", "type": "transcribed", "turn_id": 1},
{"role": "assistant", "content": "[assistant interrupts] I'm sorry to hear that. Could you provide your confirmation number?", "type": "intended", "turn_id": 1},
{"role": "user", "content": "Confirmation code is Z5OROH. And last name is White.", "type": "transcribed", "turn_id": 2},
{"tool_name": "get_reservation", "parameters": {"confirmation_number": "Z5OROH", "last_name": "White"}, "type": "tool_call", "turn_id": 2},
{"tool_name": "get_reservation", "tool_response": {"status": "success", "reservation": {"confirmation_number": "Z5OROH"}}, "type": "tool_response", "turn_id": 2},
{"role": "assistant", "content": "I found your reservation. Your flight was canceled, so you are eligible for a full refund. Shall I proceed?", "type": "intended", "turn_id": 2},
{"role": "user", "content": "Yes, go ahead.", "type": "transcribed", "turn_id": 3}
],
"assistant_interrupted_turns": [1],
"user_interrupted_turns": []
}
}
]
Loading