Skip to content

Commit

Permalink
Update Python LLM assistant to use picoLLM v1.1 (#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
laves authored Oct 2, 2024
1 parent afa108d commit b87a287
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 64 deletions.
172 changes: 109 additions & 63 deletions recipes/llm-voice-assistant/python/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import signal
import concurrent.futures
import time
from argparse import ArgumentParser
from collections import deque
Expand Down Expand Up @@ -69,6 +70,7 @@ def orca_worker(access_key: str, connection, warmup_sec: float, stream_frame_sec
synthesize = False
flush = False
close = False
interrupt = False
utterance_end_sec = 0.
delay_sec = [-1.]

Expand Down Expand Up @@ -128,6 +130,16 @@ def play_buffered_pcm() -> None:
connection.send({'done': True})
elif close:
break
elif interrupt:
orca_profiler.tick()
pcm = orca_stream.flush()
orca_profiler.tock(pcm)
connection.send({'rtf': orca_profiler.rtf(), 'delay': delay_sec[0]})
interrupt = False
pcm_deque.clear()
speaker.stop()
delay_sec[0] = -1
connection.send({'done': True})
else:
time.sleep(stream_frame_sec)

Expand All @@ -145,6 +157,8 @@ def play_buffered_pcm() -> None:
flush = True
elif message['command'] == 'close':
close = True
elif message['command'] == 'interrupt':
interrupt = True

speaker.delete()
orca_stream.close()
Expand Down Expand Up @@ -238,24 +252,24 @@ def main() -> None:
porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice'])
else:
porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path])
print(f"→ Porcupine V{porcupine.version}")
print(f"→ Porcupine v{porcupine.version}")

cheetah = pvcheetah.create(
access_key=access_key,
endpoint_duration_sec=cheetah_endpoint_duration_sec,
enable_automatic_punctuation=True)
print(f"→ Cheetah V{cheetah.version}")
print(f"→ Cheetah v{cheetah.version}")

pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device)
dialog = pllm.get_dialog()
print(f"→ picoLLM V{pllm.version} <{pllm.model}>")
print(f"→ picoLLM v{pllm.version} <{pllm.model}>")

main_connection, orca_process_connection = Pipe()
orca_process = Process(target=orca_worker, args=(access_key, orca_process_connection, orca_warmup_sec))
orca_process.start()
while not main_connection.poll():
time.sleep(0.01)
print(f"→ Orca V{main_connection.recv()['version']}")
print(f"→ Orca v{main_connection.recv()['version']}")

mic = PvRecorder(frame_length=porcupine.frame_length)
mic.start()
Expand All @@ -269,10 +283,71 @@ def handler(_, __) -> None:

signal.signal(signal.SIGINT, handler)

def llm_task(dialog, user_request, utterance_end_sec, main_connection):
short_answers_instruction = \
"You are a voice assistant and your answers are very short but informative"
dialog.add_human_request(
f"{short_answers_instruction}. {user_request}" if short_answers else user_request)

picollm_profiler = TPSProfiler()

stop_phrases = {
'</s>', # Llama-2, Mistral, and Mixtral
'<end_of_turn>', # Gemma
'<|endoftext|>', # Phi-2
'<|eot_id|>', # Llama-3
'<|end|>', '<|user|>', '<|assistant|>', # Phi-3
}

completion = ['']

def llm_callback(text: str) -> None:
picollm_profiler.tock()
completion[0] += text
if not any(x in completion[0] for x in stop_phrases):
main_connection.send({
'command': 'synthesize',
'text': text.replace('\n', ' . '),
'utterance_end_sec': utterance_end_sec})
print(text, end='', flush=True)

print(f"\nLLM (say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} to interrupt) > ", end='', flush=True)
res = pllm.generate(
prompt=dialog.prompt(),
completion_token_limit=picollm_completion_token_limit,
stop_phrases=stop_phrases,
presence_penalty=picollm_presence_penalty,
frequency_penalty=picollm_frequency_penalty,
temperature=picollm_temperature,
top_p=picollm_top_p,
stream_callback=llm_callback)

if res.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED:
main_connection.send({'command': 'interrupt'})
else:
main_connection.send({'command': 'flush'})

print('\n')
dialog.add_llm_response(res.completion)

if profile:
print(f"[picoLLM TPS: {picollm_profiler.tps():.2f}]")

while not main_connection.poll():
time.sleep(0.01)
message = main_connection.recv()
if profile:
print(f"[Orca RTF: {message['rtf']:.2f}]")
print(f"[Delay: {message['delay']:.2f} sec]")
while not main_connection.poll():
time.sleep(0.01)
assert main_connection.recv()['done']

return res

wake_word_detected = False
user_request = ''
endpoint_reached = False
utterance_end_sec = 0

porcupine_profiler = RTFProfiler(porcupine.sample_rate)
cheetah_profiler = RTFProfiler(cheetah.sample_rate)
Expand Down Expand Up @@ -304,66 +379,37 @@ def handler(_, __) -> None:
remaining_transcript = cheetah.flush()
cheetah_profiler.tock()
user_request += remaining_transcript
print(remaining_transcript, end='\n\n')
print(remaining_transcript, end='\n')
if profile:
print(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]")
else:
short_answers_instruction = \
"You are a voice assistant and your answers are very short but informative"
dialog.add_human_request(
f"{short_answers_instruction}. {user_request}" if short_answers else user_request)

picollm_profiler = TPSProfiler()

stop_phrases = {
'</s>', # Llama-2, Mistral, and Mixtral
'<end_of_turn>', # Gemma
'<|endoftext|>', # Phi-2
'<|eot_id|>', # Llama-3
}

completion = ['']

def llm_callback(text: str) -> None:
picollm_profiler.tock()
completion[0] += text
if not any(x in completion[0] for x in stop_phrases):
main_connection.send({
'command': 'synthesize',
'text': text.replace('\n', ' . '),
'utterance_end_sec': utterance_end_sec})
print(text, end='', flush=True)

print("\nLLM > ", end='', flush=True)
res = pllm.generate(
prompt=dialog.prompt(),
completion_token_limit=picollm_completion_token_limit,
stop_phrases=stop_phrases,
presence_penalty=picollm_presence_penalty,
frequency_penalty=picollm_frequency_penalty,
temperature=picollm_temperature,
top_p=picollm_top_p,
stream_callback=llm_callback)
main_connection.send({'command': 'flush'})
print('\n')
dialog.add_llm_response(res.completion)
if profile:
print(f"[picoLLM TPS: {picollm_profiler.tps():.2f}]")

while not main_connection.poll():
time.sleep(0.01)
message = main_connection.recv()
if profile:
print(f"[Orca RTF: {message['rtf']:.2f}]")
print(f"[Delay: {message['delay']:.2f} sec]")
while not main_connection.poll():
time.sleep(0.01)
assert main_connection.recv()['done']

wake_word_detected = False
user_request = ''
endpoint_reached = False
print(f"\n$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...")
with concurrent.futures.ThreadPoolExecutor() as executor:
llm_future = executor.submit(
llm_task,
dialog,
user_request,
utterance_end_sec,
main_connection)

while not llm_future.done():
pcm = mic.read()
porcupine_profiler.tick()
wake_word_detected = porcupine.process(pcm) == 0
porcupine_profiler.tock(pcm)
if wake_word_detected:
pllm.interrupt()
break

llm_result = llm_future.result()
if llm_result.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED:
wake_word_detected = True
print("$ Wake word detected, utter your request or question ...\n")
print("User > ", end='', flush=True)
else:
wake_word_detected = False
print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...")
user_request = ''
endpoint_reached = False

finally:
main_connection.send({'command': 'close'})
mic.delete()
Expand Down
2 changes: 1 addition & 1 deletion recipes/llm-voice-assistant/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
picollm==1.0.0
picollm==1.1.0
pvcheetah==2.0.1
pvorca==1.0.0
pvporcupine==3.0.2
Expand Down

0 comments on commit b87a287

Please sign in to comment.