Skip to content

Commit

Permalink
Merge pull request #101 from fractalego/interrupt-speech
Browse files Browse the repository at this point in the history
listening to what is being said while speaking
  • Loading branch information
fractalego committed Jul 12, 2024
2 parents e05d8e2 + 6061f19 commit b6180d4
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 5 deletions.
6 changes: 5 additions & 1 deletion wafl/answerer/answerer_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,11 @@ def get_text_from_facts_and_thresholds(
if item[0].text not in memory:
text = item[0].text
if item[0].metadata:
text = f"Metadata for the following text: {str(item[0].metadata)}" + "\n" + text
text = (
f"Metadata for the following text: {str(item[0].metadata)}"
+ "\n"
+ text
)
text_list.append(text)

return text_list
Expand Down
6 changes: 5 additions & 1 deletion wafl/connectors/clients/llm_chat_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,8 @@ async def _get_answer_prompt(
)

def _get_system_prompt(self, text, rules_text):
return self.prompt.replace("{facts}", text.strip()).replace("{rules}", rules_text.strip()).strip()
return (
self.prompt.replace("{facts}", text.strip())
.replace("{rules}", rules_text.strip())
.strip()
)
24 changes: 21 additions & 3 deletions wafl/speaker/fairseq_speaker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import asyncio

import numpy as np
import pyaudio

from wafl.connectors.factories.speaker_connector_factory import SpeakerConnectorFactory
Expand All @@ -10,8 +12,13 @@ class FairSeqSpeaker(BaseSpeaker):
def __init__(self, config):
self._connector = SpeakerConnectorFactory.get_connector(config)
self._p = pyaudio.PyAudio()
self._input_chunk_size = 1024
self._output_chunk_size = 4096
self._volume_threshold = (
config.get_value("listener_model")["listener_volume_threshold"] / 5e3
)

async def speak(self, text): #### This is the function that is called in the VoiceInterface class
async def speak(self, text):
text = convert_numbers_to_words(text)
prediction = await self._connector.predict(text)
wav = prediction["wav"]
Expand All @@ -20,10 +27,21 @@ async def speak(self, text): #### This is the function that is called in the Vo
format=pyaudio.paFloat32,
channels=1,
rate=rate,
input=True,
output=True,
)
await asyncio.sleep(0.2)
stream.write(wav)
stream.start_stream()
await asyncio.sleep(0.1)
for i in range(0, len(wav), self._output_chunk_size):
inp = stream.read(self._input_chunk_size)
if _rms(inp) > self._volume_threshold:
break
stream.write(wav[i : i + self._output_chunk_size])
stream.stop_stream()
stream.close()
await asyncio.sleep(0.1)


def _rms(frame):
data = np.frombuffer(frame, dtype=np.float32)
return np.std(data) / len(data)

0 comments on commit b6180d4

Please sign in to comment.