From f8851ed85ee63d4341daac18160d50d7d63ff99a Mon Sep 17 00:00:00 2001 From: Matthew Kennedy Date: Thu, 4 Apr 2024 17:39:43 +0000 Subject: [PATCH 1/3] Revert "Revert "only append audio if speech has started"" This reverts commit 992ca8f4584bc883e6aff961ac6c005658cf9480. --- openduck-py/openduck_py/response_agent.py | 27 ++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/openduck-py/openduck_py/response_agent.py b/openduck-py/openduck_py/response_agent.py index cf75d98..4b1e7fd 100644 --- a/openduck-py/openduck_py/response_agent.py +++ b/openduck-py/openduck_py/response_agent.py @@ -243,6 +243,7 @@ def __init__( self.response_task: Optional[asyncio.Task] = None self.interrupt_event = asyncio.Event() self.system_prompt = system_prompt + self.speech_has_started = False if context is None: context = {} @@ -325,10 +326,30 @@ async def receive_audio(self, message: bytes): vad_result = self.vad(audio_16k_chunk) if vad_result: async with SessionAsync() as db: - if "end" in vad_result: - print("end of speech detected.") + transcription = "" + if "start" in vad_result or "end" in vad_result: self.time_of_last_activity = time() - await log_event(db, self.session_id, "detected_end_of_speech") + if "start" in vad_result: + self.speech_has_started = True + print("Detected start of speech", flush=True) + # await log_event( + # db, + # self.session_id, + # "detected_start_of_speech", + # audio=audio_data, + # ) + else: + self.speech_has_started = False + print("Detected end of speech", flush=True) + # await log_event( + # db, + # self.session_id, + # "detected_end_of_speech", + # audio=audio_data, + # ) + + if self.speech_has_started: + self.audio_data.append(audio_16k_np) audio_data = np.concatenate(self.audio_data) transcription = await _transcribe(audio_data) From 4eb8c3fe140582add28770b74b6e834a69f362b8 Mon Sep 17 00:00:00 2001 From: Matthew Kennedy Date: Thu, 4 Apr 2024 17:40:13 +0000 Subject: [PATCH 2/3] delete log --- openduck-py/openduck_py/response_agent.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/openduck-py/openduck_py/response_agent.py b/openduck-py/openduck_py/response_agent.py index 4b1e7fd..9cce38f 100644 --- a/openduck-py/openduck_py/response_agent.py +++ b/openduck-py/openduck_py/response_agent.py @@ -332,21 +332,9 @@ async def receive_audio(self, message: bytes): if "start" in vad_result: self.speech_has_started = True print("Detected start of speech", flush=True) - # await log_event( - # db, - # self.session_id, - # "detected_start_of_speech", - # audio=audio_data, - # ) else: self.speech_has_started = False print("Detected end of speech", flush=True) - # await log_event( - # db, - # self.session_id, - # "detected_end_of_speech", - # audio=audio_data, - # ) if self.speech_has_started: self.audio_data.append(audio_16k_np) From 0fe03511970be57e505a26bcdcd96d31da8918d2 Mon Sep 17 00:00:00 2001 From: Matthew Kennedy Date: Thu, 4 Apr 2024 17:49:15 +0000 Subject: [PATCH 3/3] fix --- openduck-py/openduck_py/response_agent.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/openduck-py/openduck_py/response_agent.py b/openduck-py/openduck_py/response_agent.py index 9cce38f..321fe5c 100644 --- a/openduck-py/openduck_py/response_agent.py +++ b/openduck-py/openduck_py/response_agent.py @@ -353,17 +353,16 @@ async def receive_audio(self, message: bytes): await log_event(db, self.session_id, "interrupted_response") await self.interrupt(self.response_task) - await log_event( - db, self.session_id, "started_response", audio=audio_data - ) - self.response_task = asyncio.create_task( - self.start_response(transcription) - ) - - if "start" in vad_result: - print("start of speech detected.") - self.time_of_last_activity = time() - await log_event(db, self.session_id, "detected_start_of_speech") + if "end" in vad_result: + await log_event( + db, + self.session_id, + "started_response", + audio=audio_data, + ) + self.response_task = asyncio.create_task( + self.start_response(transcription) + ) i = upper async def _generate_and_speak(