Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/agents/voice/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ async def _add_text(self, text: str):

combined_sentences, self._text_buffer = self.tts_settings.text_splitter(self._text_buffer)

if len(combined_sentences) >= 20:
if combined_sentences:
local_queue: asyncio.Queue[VoiceStreamEvent | None] = asyncio.Queue()
self._ordered_tasks.append(local_queue)
self._tasks.append(
Expand All @@ -220,6 +220,10 @@ async def _turn_done(self):
)
)
self._text_buffer = ""
elif self._started_processing_turn:
local_queue = asyncio.Queue()
self._ordered_tasks.append(local_queue)
await local_queue.put(VoiceStreamEventLifecycle(event="turn_ended"))
self._done_processing = True
if self._dispatcher_task is None:
self._dispatcher_task = asyncio.create_task(self._dispatch_audio())
Expand Down
58 changes: 58 additions & 0 deletions tests/voice/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,64 @@ async def run(self, text: str, settings: TTSModelSettings):
assert audio_chunks == [np.array([1], dtype=np.int16).tobytes()]


@pytest.mark.asyncio
async def test_streamed_audio_result_synthesizes_short_custom_splitter_chunk() -> None:
texts: list[str] = []

class RecordingTTS(FakeTTS):
async def run(self, text: str, settings: TTSModelSettings):
texts.append(text)
yield np.zeros(2, dtype=np.int16).tobytes()

def split_immediately(text: str) -> tuple[str, str]:
return text, ""

result = StreamedAudioResult(
RecordingTTS(),
TTSModelSettings(buffer_size=1, text_splitter=split_immediately),
VoicePipelineConfig(),
)

await result._add_text("ok")
await result._turn_done()
await result._done()

events, audio_chunks = await extract_events(result)

assert texts == ["ok"]
assert events == ["turn_started", "audio", "turn_ended", "session_ended"]
assert audio_chunks == [np.zeros(2, dtype=np.int16).tobytes()]


@pytest.mark.asyncio
async def test_streamed_audio_result_ignores_empty_custom_splitter_chunk() -> None:
texts: list[str] = []

class RecordingTTS(FakeTTS):
async def run(self, text: str, settings: TTSModelSettings):
texts.append(text)
yield np.zeros(2, dtype=np.int16).tobytes()

def discard_text(_text: str) -> tuple[str, str]:
return "", ""

result = StreamedAudioResult(
RecordingTTS(),
TTSModelSettings(buffer_size=1, text_splitter=discard_text),
VoicePipelineConfig(),
)

await result._add_text("ok")
await result._turn_done()
await result._done()

events, audio_chunks = await extract_events(result)

assert texts == []
assert events == ["turn_started", "turn_ended", "session_ended"]
assert audio_chunks == []


@pytest.mark.asyncio
async def test_voicepipeline_run_single_turn() -> None:
# Single turn. Should produce a single audio output, which is the TTS output for "out_1".
Expand Down
Loading