From 3c00157d8bcda1b3bd92a49888f453509c7b0c0d Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Fri, 29 Nov 2024 09:30:42 -0800 Subject: [PATCH 1/9] changed interrupt to work after picollm has finished generating --- recipes/llm-voice-assistant/python/main.py | 644 ++++++++++++--------- 1 file changed, 378 insertions(+), 266 deletions(-) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index 054669a..ccb1df5 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -3,15 +3,8 @@ import time from argparse import ArgumentParser from collections import deque -from itertools import chain -from multiprocessing import ( - Pipe, - Process, -) -from typing import ( - Optional, - Sequence, -) +from multiprocessing import Process, Queue +from typing import Optional, Sequence import picollm import pvcheetah @@ -21,6 +14,18 @@ from pvspeaker import PvSpeaker +class Commands: + CLOSE = 'close' + START = 'start' + INTERRUPT = 'interrupt' + TEXT = 'text' + GENERATE = 'generate' + SYNTHESIZE_START = 'synthesize-start' + SYNTHESIZE = 'synthesize' + SYNTHESIZE_FLUSH = 'synthesize-flush' + PROFILE = 'profile' + + class RTFProfiler: def __init__(self, sample_rate: int) -> None: self._sample_rate = sample_rate @@ -67,6 +72,11 @@ def __init__(self, stop_phrases: list) -> None: self.text: str = '' self.new_tokens: str = '' + def reset(self): + self.start: int = 0 + self.text: str = '' + self.new_tokens: str = '' + def append(self, text: str) -> None: self.text += text end = len(self.text) @@ -91,109 +101,276 @@ def get_new_tokens(self) -> str: return self.new_tokens -def orca_worker(access_key: str, connection, warmup_sec: float, stream_frame_sec: int = 0.03) -> None: - orca = pvorca.create(access_key=access_key) - orca_stream = orca.stream_open() +def listen_worker(main_queue, listen_queue, access_key, keyword_model_path, cheetah_endpoint_duration_sec): + def handler(_, __) -> None: + main_queue.put({'command': Commands.CLOSE}) - texts = list() - pcm_deque = deque() - warmup = [False] - synthesize = False - flush = False - close = False - interrupt = False - utterance_end_sec = 0. - delay_sec = [-1.] + signal.signal(signal.SIGINT, handler) - speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=20) + if keyword_model_path is None: + porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice']) + else: + porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path]) + porcupine_profiler = RTFProfiler(porcupine.sample_rate) - connection.send({'version': orca.version}) + main_queue.put({'command': 'init', 'name': 'Porcupine', 'version': porcupine.version}) - orca_profiler = RTFProfiler(orca.sample_rate) + cheetah = pvcheetah.create( + access_key=access_key, + endpoint_duration_sec=cheetah_endpoint_duration_sec, + enable_automatic_punctuation=True) + cheetah_profiler = RTFProfiler(cheetah.sample_rate) - def buffer_pcm(pcm_chunk: Optional[Sequence[int]]) -> None: - if pcm_chunk is not None: - if delay_sec[0] == -1: - delay_sec[0] = time.perf_counter() - utterance_end_sec + main_queue.put({'command': 'init', 'name': 'Cheetah', 'version': cheetah.version}) - pcm_deque.append(pcm_chunk) + mic = PvRecorder(frame_length=porcupine.frame_length) + mic.start() + + main_queue.put({'command': 'init', 'name': 'PvRecorder', 'version': mic.version}) + + while listen_queue.empty(): + time.sleep(0.01) + listen_queue.get() - def play_buffered_pcm() -> None: - if warmup[0]: - if len(list(chain.from_iterable(pcm_deque))) < int(warmup_sec * orca.sample_rate): - return + try: + close = False + listening = False + user_request = '' + while not close: + if listen_queue.empty(): + time.sleep(0.01) + + while not listen_queue.empty(): + message = listen_queue.get() + if message['command'] == Commands.CLOSE: + close = True + + pcm = mic.read() + if not listening: + porcupine_profiler.tick() + wake_word_detected = porcupine.process(pcm) == 0 + porcupine_profiler.tock(pcm) + if wake_word_detected: + listening = True + main_queue.put({'command': Commands.PROFILE, 'text': f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]"}) + main_queue.put({'command': Commands.INTERRUPT}) else: - warmup[0] = False - - if len(pcm_deque) > 0: - pcm_chunk = list(chain.from_iterable(pcm_deque)) - pcm_deque.clear() - - written = speaker.write(pcm_chunk) - if written < len(pcm_chunk): - pcm_deque.appendleft(pcm_chunk[written:]) - - while True: - if synthesize and len(texts) > 0: - orca_profiler.tick() - pcm = orca_stream.synthesize(texts.pop(0)) - orca_profiler.tock(pcm) - buffer_pcm(pcm) - play_buffered_pcm() - elif flush: - while len(texts) > 0: + cheetah_profiler.tick() + partial_transcript, endpoint_reached = cheetah.process(pcm) + cheetah_profiler.tock(pcm) + user_request += partial_transcript + main_queue.put({'command': Commands.TEXT, 'text': partial_transcript}) + if endpoint_reached: + utterance_end_sec = time.perf_counter() + cheetah_profiler.tick() + remaining_transcript = cheetah.flush() + cheetah_profiler.tock(pcm) + user_request += remaining_transcript + main_queue.put({'command': Commands.TEXT, 'text': remaining_transcript}) + main_queue.put({'command': Commands.GENERATE, 'text': user_request, 'utterance_end_sec': utterance_end_sec}) + main_queue.put({'command': Commands.PROFILE, 'text': f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]"}) + user_request = '' + listening = False + finally: + porcupine.delete() + cheetah.delete() + mic.delete() + + +def generate_worker(main_queue, generate_queue, access_key, picollm_model_path, picollm_device, picollm_completion_token_limit, picollm_presence_penalty, picollm_frequency_penalty, picollm_temperature, picollm_top_p, short_answers): + def handler(_, __) -> None: + main_queue.put({'command': Commands.CLOSE}) + + signal.signal(signal.SIGINT, handler) + + pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device) + pllm_profiler = TPSProfiler() + dialog = pllm.get_dialog() + generating = False + + main_queue.put({'command': 'init', 'name': 'picoLLM', 'version': f"{pllm.version} <{pllm.model}>"}) + + stop_phrases = { + '', # Llama-2, Mistral, and Mixtral + '', # Gemma + '<|endoftext|>', # Phi-2 + '<|eot_id|>', # Llama-3 + '<|end|>', '<|user|>', '<|assistant|>', # Phi-3 + } + + completion = CompletionText(stop_phrases) + + def llm_callback(text: str): + pllm_profiler.tock() + completion.append(text) + new_tokens = completion.get_new_tokens() + if len(new_tokens) > 0 and generating: + main_queue.put({'command': Commands.SYNTHESIZE, 'text': new_tokens}) + + def llm_task(user_request, utterance_end_sec): + short_answers_instruction = \ + "You are a voice assistant and your answers are very short but informative" + dialog.add_human_request( + f"{short_answers_instruction}. {user_request}" if short_answers else user_request) + + main_queue.put({'command': Commands.SYNTHESIZE_START, 'utterance_end_sec': utterance_end_sec}) + + res = pllm.generate( + prompt=dialog.prompt(), + completion_token_limit=picollm_completion_token_limit, + stop_phrases=stop_phrases, + presence_penalty=picollm_presence_penalty, + frequency_penalty=picollm_frequency_penalty, + temperature=picollm_temperature, + top_p=picollm_top_p, + stream_callback=llm_callback) + + dialog.add_llm_response(res.completion) + + if res.endpoint != picollm.PicoLLMEndpoints.INTERRUPTED: + main_queue.put({'command': Commands.SYNTHESIZE_FLUSH}) + + main_queue.put({'command': Commands.PROFILE, 'text': f"[picoLLM TPS: {pllm_profiler.tps():.2f}]"}) + + return res + + executor = concurrent.futures.ThreadPoolExecutor() + + while generate_queue.empty(): + time.sleep(0.01) + generate_queue.get() + + try: + close = False + llm_future = None + while not close: + if generate_queue.empty(): + time.sleep(0.01) + + while not generate_queue.empty(): + message = generate_queue.get() + if message['command'] == Commands.CLOSE: + close = True + elif message['command'] == Commands.GENERATE: + generating = True + completion.reset() + llm_future = executor.submit( + llm_task, + message['text'], + message['utterance_end_sec']) + elif message['command'] == Commands.INTERRUPT and generating: + generating = False + pllm.interrupt() + + if llm_future and llm_future.done(): + llm_future = None + generating = False + finally: + while llm_future and not llm_future.done(): + time.sleep(0.01) + + executor.shutdown(True) + pllm.release() + + +def speak_worker(main_queue, speak_queue, access_key, warmup_sec): + def handler(_, __) -> None: + main_queue.put({'command': Commands.CLOSE}) + + signal.signal(signal.SIGINT, handler) + + orca = pvorca.create(access_key=access_key) + orca_stream = orca.stream_open() + orca_profiler = RTFProfiler(orca.sample_rate) + warmup_size = int(warmup_sec * orca.sample_rate) + + main_queue.put({'command': 'init', 'name': 'Orca', 'version': orca.version}) + + speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=20) + + main_queue.put({'command': 'init', 'name': 'PvSpeaker', 'version': speaker.version}) + + while speak_queue.empty(): + time.sleep(0.01) + speak_queue.get() + + try: + close = False + synthesizing = False + speaking = False + flush = False + text_queue = deque() + pcm_queue = list() + delay_sec = -1 + utterance_end_sec = 0 + while not close: + if speak_queue.empty(): + time.sleep(0.01) + + while not speak_queue.empty(): + message = speak_queue.get() + if message['command'] == Commands.CLOSE: + close = True + elif message['command'] == Commands.SYNTHESIZE_START: + synthesizing = True + utterance_end_sec = message['utterance_end_sec'] + delay_sec = -1 + elif message['command'] == Commands.SYNTHESIZE: + text_queue.append(message['text'].replace('\n', ' . ')) + elif message['command'] == Commands.INTERRUPT: + if synthesizing: + orca_profiler.tick() + pcm = orca_stream.flush() + orca_profiler.tock(pcm) + main_queue.put({'command': Commands.PROFILE, 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"}) + if speaking: + speaker.stop() + text_queue.clear() + pcm_queue.clear() + synthesizing = False + speaking = False + flush = False + elif message['command'] == Commands.SYNTHESIZE_FLUSH: + flush = True + + while len(text_queue) > 0: + text = text_queue.popleft() orca_profiler.tick() - pcm = orca_stream.synthesize(texts.pop(0)) + pcm = orca_stream.synthesize(text) orca_profiler.tock(pcm) - buffer_pcm(pcm) - play_buffered_pcm() - orca_profiler.tick() - pcm = orca_stream.flush() - orca_profiler.tock(pcm) - buffer_pcm(pcm) - play_buffered_pcm() - connection.send({'rtf': orca_profiler.rtf(), 'delay': delay_sec[0]}) - flush = False - speaker.flush(list(chain.from_iterable(pcm_deque))) - pcm_deque.clear() - speaker.stop() - delay_sec[0] = -1 - connection.send({'done': True}) - elif close: - break - elif interrupt: - orca_profiler.tick() - pcm = orca_stream.flush() - orca_profiler.tock(pcm) - connection.send({'rtf': orca_profiler.rtf(), 'delay': delay_sec[0]}) - interrupt = False - pcm_deque.clear() - speaker.stop() - delay_sec[0] = -1 - connection.send({'done': True}) - else: - time.sleep(stream_frame_sec) - - while connection.poll(): - message = connection.recv() - if message['command'] == 'synthesize': - texts.append(message['text']) - if not speaker.is_started: - speaker.start() - warmup[0] = True - utterance_end_sec = message['utterance_end_sec'] - synthesize = True - elif message['command'] == 'flush': - synthesize = False - flush = True - elif message['command'] == 'close': - close = True - elif message['command'] == 'interrupt': - interrupt = True + if pcm is not None: + if delay_sec == -1: + delay_sec = time.perf_counter() - utterance_end_sec + pcm_queue.extend(pcm) - speaker.delete() - orca_stream.close() - orca.delete() + if flush and synthesizing: + orca_profiler.tick() + pcm = orca_stream.flush() + orca_profiler.tock(pcm) + synthesizing = False + if pcm is not None: + pcm_queue.extend(pcm) + main_queue.put({'command': Commands.PROFILE, 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"}) + + if not speaking and len(pcm_queue) > warmup_size: + speaker.start() + speaking = True + + if speaking and len(pcm_queue) > 0: + written = speaker.write(pcm_queue) + if written > 0: + del pcm_queue[:written] + + if speaking and flush and len(pcm_queue) == 0: + speaker.flush(pcm_queue) + speaker.stop() + speaking = False + flush = False + main_queue.put({'command': Commands.START}) + finally: + orca_stream.close() + orca.delete() + speaker.delete() def main() -> None: @@ -279,180 +456,115 @@ def main() -> None: profile = args.profile short_answers = args.short_answers - if keyword_model_path is None: - porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice']) - else: - porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path]) - print(f"→ Porcupine v{porcupine.version}") - - cheetah = pvcheetah.create( - access_key=access_key, - endpoint_duration_sec=cheetah_endpoint_duration_sec, - enable_automatic_punctuation=True) - print(f"→ Cheetah v{cheetah.version}") - - pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device) - dialog = pllm.get_dialog() - print(f"→ picoLLM v{pllm.version} <{pllm.model}>") - - main_connection, orca_process_connection = Pipe() - orca_process = Process(target=orca_worker, args=(access_key, orca_process_connection, orca_warmup_sec)) - orca_process.start() - while not main_connection.poll(): - time.sleep(0.01) - print(f"→ Orca v{main_connection.recv()['version']}") - - mic = PvRecorder(frame_length=porcupine.frame_length) - mic.start() - - print(f"\n$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...") - - stop = [False] + main_queue = Queue() + listen_queue = Queue() + generate_queue = Queue() + speak_queue = Queue() + + listen_process = Process(target=listen_worker, args=( + main_queue, + listen_queue, + access_key, + keyword_model_path, + cheetah_endpoint_duration_sec + )) + generate_process = Process(target=generate_worker, args=( + main_queue, + generate_queue, + access_key, + picollm_model_path, + picollm_device, + picollm_completion_token_limit, + picollm_presence_penalty, + picollm_frequency_penalty, + picollm_temperature, + picollm_top_p, + short_answers + )) + speak_process = Process(target=speak_worker, args=( + main_queue, + speak_queue, + access_key, + orca_warmup_sec + )) def handler(_, __) -> None: - stop[0] = True + main_queue.put({'command': Commands.CLOSE}) signal.signal(signal.SIGINT, handler) - def llm_task(dialog, user_request, utterance_end_sec, main_connection): - short_answers_instruction = \ - "You are a voice assistant and your answers are very short but informative" - dialog.add_human_request( - f"{short_answers_instruction}. {user_request}" if short_answers else user_request) - - picollm_profiler = TPSProfiler() - - stop_phrases = { - '', # Llama-2, Mistral, and Mixtral - '', # Gemma - '<|endoftext|>', # Phi-2 - '<|eot_id|>', # Llama-3 - '<|end|>', '<|user|>', '<|assistant|>', # Phi-3 - } - - completion = CompletionText(stop_phrases) - - def llm_callback(text: str) -> None: - picollm_profiler.tock() - completion.append(text) - new_tokens = completion.get_new_tokens() - if len(new_tokens) > 0: - main_connection.send({ - 'command': 'synthesize', - 'text': new_tokens.replace('\n', ' . '), - 'utterance_end_sec': utterance_end_sec}) - print(f'{new_tokens}', end='', flush=True) - - print( - f"\nLLM (say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} to interrupt) > ", - end='', - flush=True) - res = pllm.generate( - prompt=dialog.prompt(), - completion_token_limit=picollm_completion_token_limit, - stop_phrases=stop_phrases, - presence_penalty=picollm_presence_penalty, - frequency_penalty=picollm_frequency_penalty, - temperature=picollm_temperature, - top_p=picollm_top_p, - stream_callback=llm_callback) - - if res.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED: - main_connection.send({'command': 'interrupt'}) - else: - main_connection.send({'command': 'flush'}) - - print('\n') - dialog.add_llm_response(res.completion) + generate_process.start() + listen_process.start() + speak_process.start() - if profile: - print(f"[picoLLM TPS: {picollm_profiler.tps():.2f}]") - - while not main_connection.poll(): - time.sleep(0.01) - message = main_connection.recv() - if profile: - print(f"[Orca RTF: {message['rtf']:.2f}]") - print(f"[Delay: {message['delay']:.2f} sec]") - while not main_connection.poll(): - time.sleep(0.01) - assert main_connection.recv()['done'] - - return res - - wake_word_detected = False - user_request = '' - endpoint_reached = False - - porcupine_profiler = RTFProfiler(porcupine.sample_rate) - cheetah_profiler = RTFProfiler(cheetah.sample_rate) + modules = [ + 'Porcupine', + 'Cheetah', + 'PvRecorder', + 'picoLLM', + 'Orca', + 'PvSpeaker' + ] try: - while True: - if stop[0]: - break - elif not wake_word_detected: - pcm = mic.read() - porcupine_profiler.tick() - wake_word_detected = porcupine.process(pcm) == 0 - porcupine_profiler.tock(pcm) - if wake_word_detected: - if profile: - print(f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]") - print("$ Wake word detected, utter your request or question ...\n") - print("User > ", end='', flush=True) - elif not endpoint_reached: - pcm = mic.read() - cheetah_profiler.tick() - partial_transcript, endpoint_reached = cheetah.process(pcm) - cheetah_profiler.tock(pcm) - print(partial_transcript, end='', flush=True) - user_request += partial_transcript - if endpoint_reached: - utterance_end_sec = time.perf_counter() - cheetah_profiler.tick() - remaining_transcript = cheetah.flush() - cheetah_profiler.tock() - user_request += remaining_transcript - print(remaining_transcript, end='\n') - if profile: - print(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]") - with concurrent.futures.ThreadPoolExecutor() as executor: - llm_future = executor.submit( - llm_task, - dialog, - user_request, - utterance_end_sec, - main_connection) - - while not llm_future.done(): - pcm = mic.read() - porcupine_profiler.tick() - wake_word_detected = porcupine.process(pcm) == 0 - porcupine_profiler.tock(pcm) - if wake_word_detected: - pllm.interrupt() - break - - llm_result = llm_future.result() - if llm_result.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED: - wake_word_detected = True - print("$ Wake word detected, utter your request or question ...\n") - print("User > ", end='', flush=True) - else: - wake_word_detected = False - print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...") - user_request = '' - endpoint_reached = False - + close = False + generating = False + while not close: + while main_queue.empty(): + time.sleep(0.01) + + message = main_queue.get(block=True) + if message['command'] == Commands.CLOSE: + close = True + elif message['command'] == 'init': + print(f"→ {message['name']} v{message['version']}") + modules.remove(message['name']) + if len(modules) == 0: + main_queue.put({'command': Commands.START}) + listen_queue.put({'command': Commands.START}) + generate_queue.put({'command': Commands.START}) + speak_queue.put({'command': Commands.START}) + elif message['command'] == Commands.START: + print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...") + elif message['command'] == Commands.INTERRUPT: + if generating: + print() + generating = False + print("$ Wake word detected, utter your request or question ...") + print("User > ", end='', flush=True) + generate_queue.put(message) + speak_queue.put(message) + elif message['command'] == Commands.TEXT: + print(message['text'], end='', flush=True) + elif message['command'] == Commands.GENERATE: + print() + generate_queue.put(message) + elif message['command'] == Commands.SYNTHESIZE_START: + print(f"LLM (say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} to interrupt) > ", end='', flush=True) + speak_queue.put(message) + generating = True + elif message['command'] == Commands.SYNTHESIZE: + print(message['text'], end='', flush=True) + speak_queue.put(message) + elif message['command'] == Commands.SYNTHESIZE_FLUSH: + print() + speak_queue.put(message) + generating = False + elif message['command'] == Commands.PROFILE: + if profile: + print(message['text']) finally: - main_connection.send({'command': 'close'}) - mic.delete() - pllm.release() - cheetah.delete() - porcupine.delete() - orca_process.join() + generate_queue.put({'command': Commands.INTERRUPT}) + speak_queue.put({'command': Commands.INTERRUPT}) + + listen_queue.put({'command': Commands.CLOSE}) + generate_queue.put({'command': Commands.CLOSE}) + speak_queue.put({'command': Commands.CLOSE}) + + listen_process.join() + generate_process.join() + speak_process.join() if __name__ == '__main__': - main() + main() \ No newline at end of file From e5d66174b8403d3c9c5568c4ca3e65916b48a7f7 Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Thu, 5 Dec 2024 11:25:37 -0800 Subject: [PATCH 2/9] Improved state management --- recipes/llm-voice-assistant/python/main.py | 38 +++++++++++++--------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index ccb1df5..d059be2 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -15,6 +15,7 @@ class Commands: + INIT = 'init' CLOSE = 'close' START = 'start' INTERRUPT = 'interrupt' @@ -41,7 +42,10 @@ def tock(self, audio: Optional[Sequence[int]] = None) -> None: self._audio_sec += (len(audio) / self._sample_rate) if audio is not None else 0. def rtf(self) -> float: - rtf = self._compute_sec / self._audio_sec + if self._audio_sec > 0: + rtf = self._compute_sec / self._audio_sec + else: + rtf = 0 self._compute_sec = 0. self._audio_sec = 0. return rtf @@ -113,7 +117,7 @@ def handler(_, __) -> None: porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path]) porcupine_profiler = RTFProfiler(porcupine.sample_rate) - main_queue.put({'command': 'init', 'name': 'Porcupine', 'version': porcupine.version}) + main_queue.put({'command': Commands.INIT, 'name': 'Porcupine', 'version': porcupine.version}) cheetah = pvcheetah.create( access_key=access_key, @@ -121,16 +125,15 @@ def handler(_, __) -> None: enable_automatic_punctuation=True) cheetah_profiler = RTFProfiler(cheetah.sample_rate) - main_queue.put({'command': 'init', 'name': 'Cheetah', 'version': cheetah.version}) + main_queue.put({'command': Commands.INIT, 'name': 'Cheetah', 'version': cheetah.version}) mic = PvRecorder(frame_length=porcupine.frame_length) mic.start() - main_queue.put({'command': 'init', 'name': 'PvRecorder', 'version': mic.version}) + main_queue.put({'command': Commands.INIT, 'name': 'PvRecorder', 'version': mic.version}) while listen_queue.empty(): time.sleep(0.01) - listen_queue.get() try: close = False @@ -158,8 +161,9 @@ def handler(_, __) -> None: cheetah_profiler.tick() partial_transcript, endpoint_reached = cheetah.process(pcm) cheetah_profiler.tock(pcm) - user_request += partial_transcript - main_queue.put({'command': Commands.TEXT, 'text': partial_transcript}) + if len(partial_transcript) > 0: + user_request += partial_transcript + main_queue.put({'command': Commands.TEXT, 'text': partial_transcript}) if endpoint_reached: utterance_end_sec = time.perf_counter() cheetah_profiler.tick() @@ -188,7 +192,7 @@ def handler(_, __) -> None: dialog = pllm.get_dialog() generating = False - main_queue.put({'command': 'init', 'name': 'picoLLM', 'version': f"{pllm.version} <{pllm.model}>"}) + main_queue.put({'command': Commands.INIT, 'name': 'picoLLM', 'version': f"{pllm.version} <{pllm.model}>"}) stop_phrases = { '', # Llama-2, Mistral, and Mixtral @@ -238,7 +242,6 @@ def llm_task(user_request, utterance_end_sec): while generate_queue.empty(): time.sleep(0.01) - generate_queue.get() try: close = False @@ -284,15 +287,14 @@ def handler(_, __) -> None: orca_profiler = RTFProfiler(orca.sample_rate) warmup_size = int(warmup_sec * orca.sample_rate) - main_queue.put({'command': 'init', 'name': 'Orca', 'version': orca.version}) + main_queue.put({'command': Commands.INIT, 'name': 'Orca', 'version': orca.version}) - speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=20) + speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=1) - main_queue.put({'command': 'init', 'name': 'PvSpeaker', 'version': speaker.version}) + main_queue.put({'command': Commands.INIT, 'name': 'PvSpeaker', 'version': speaker.version}) while speak_queue.empty(): time.sleep(0.01) - speak_queue.get() try: close = False @@ -350,7 +352,7 @@ def handler(_, __) -> None: synthesizing = False if pcm is not None: pcm_queue.extend(pcm) - main_queue.put({'command': Commands.PROFILE, 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"}) + main_queue.put({'command': Commands.PROFILE, 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"}) if not speaking and len(pcm_queue) > warmup_size: speaker.start() @@ -508,6 +510,7 @@ def handler(_, __) -> None: try: close = False + listening = False generating = False while not close: while main_queue.empty(): @@ -516,7 +519,7 @@ def handler(_, __) -> None: message = main_queue.get(block=True) if message['command'] == Commands.CLOSE: close = True - elif message['command'] == 'init': + elif message['command'] == Commands.INIT: print(f"→ {message['name']} v{message['version']}") modules.remove(message['name']) if len(modules) == 0: @@ -525,7 +528,8 @@ def handler(_, __) -> None: generate_queue.put({'command': Commands.START}) speak_queue.put({'command': Commands.START}) elif message['command'] == Commands.START: - print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...") + if not listening: + print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...") elif message['command'] == Commands.INTERRUPT: if generating: print() @@ -534,11 +538,13 @@ def handler(_, __) -> None: print("User > ", end='', flush=True) generate_queue.put(message) speak_queue.put(message) + listening = True elif message['command'] == Commands.TEXT: print(message['text'], end='', flush=True) elif message['command'] == Commands.GENERATE: print() generate_queue.put(message) + listening = False elif message['command'] == Commands.SYNTHESIZE_START: print(f"LLM (say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} to interrupt) > ", end='', flush=True) speak_queue.put(message) From c05a53a31eb0121d5fb0fda43ff20b3edeab4da5 Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Thu, 5 Dec 2024 11:33:35 -0800 Subject: [PATCH 3/9] fixed codestyle and spellcheck --- recipes/llm-voice-assistant/python/main.py | 39 +++++++++++++++++----- res/.lint/spell-check/dict.txt | 1 + 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index d059be2..ae71166 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -155,7 +155,9 @@ def handler(_, __) -> None: porcupine_profiler.tock(pcm) if wake_word_detected: listening = True - main_queue.put({'command': Commands.PROFILE, 'text': f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]"}) + main_queue.put({ + 'command': Commands.PROFILE, + 'text': f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]"}) main_queue.put({'command': Commands.INTERRUPT}) else: cheetah_profiler.tick() @@ -171,8 +173,13 @@ def handler(_, __) -> None: cheetah_profiler.tock(pcm) user_request += remaining_transcript main_queue.put({'command': Commands.TEXT, 'text': remaining_transcript}) - main_queue.put({'command': Commands.GENERATE, 'text': user_request, 'utterance_end_sec': utterance_end_sec}) - main_queue.put({'command': Commands.PROFILE, 'text': f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]"}) + main_queue.put({ + 'command': Commands.GENERATE, + 'text': user_request, + 'utterance_end_sec': utterance_end_sec}) + main_queue.put({ + 'command': Commands.PROFILE, + 'text': f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]"}) user_request = '' listening = False finally: @@ -181,7 +188,18 @@ def handler(_, __) -> None: mic.delete() -def generate_worker(main_queue, generate_queue, access_key, picollm_model_path, picollm_device, picollm_completion_token_limit, picollm_presence_penalty, picollm_frequency_penalty, picollm_temperature, picollm_top_p, short_answers): +def generate_worker( + main_queue, + generate_queue, + access_key, + picollm_model_path, + picollm_device, + picollm_completion_token_limit, + picollm_presence_penalty, + picollm_frequency_penalty, + picollm_temperature, + picollm_top_p, + short_answers): def handler(_, __) -> None: main_queue.put({'command': Commands.CLOSE}) @@ -324,7 +342,9 @@ def handler(_, __) -> None: orca_profiler.tick() pcm = orca_stream.flush() orca_profiler.tock(pcm) - main_queue.put({'command': Commands.PROFILE, 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"}) + main_queue.put({ + 'command': Commands.PROFILE, + 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"}) if speaking: speaker.stop() text_queue.clear() @@ -352,7 +372,9 @@ def handler(_, __) -> None: synthesizing = False if pcm is not None: pcm_queue.extend(pcm) - main_queue.put({'command': Commands.PROFILE, 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"}) + main_queue.put({ + 'command': Commands.PROFILE, + 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"}) if not speaking and len(pcm_queue) > warmup_size: speaker.start() @@ -546,7 +568,8 @@ def handler(_, __) -> None: generate_queue.put(message) listening = False elif message['command'] == Commands.SYNTHESIZE_START: - print(f"LLM (say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} to interrupt) > ", end='', flush=True) + wake_word = '`Picovoice`' if keyword_model_path is None else 'the wake word' + print(f"LLM (say {wake_word} to interrupt) > ", end='', flush=True) speak_queue.put(message) generating = True elif message['command'] == Commands.SYNTHESIZE: @@ -573,4 +596,4 @@ def handler(_, __) -> None: if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/res/.lint/spell-check/dict.txt b/res/.lint/spell-check/dict.txt index b654661..7d553f8 100644 --- a/res/.lint/spell-check/dict.txt +++ b/res/.lint/spell-check/dict.txt @@ -24,6 +24,7 @@ picollm picovoice pids pllm +popleft psutil pvcheetah pvorca From 4d9dc0ed5157c06df140e59253261abdd5e8b77b Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Fri, 27 Dec 2024 15:37:04 -0800 Subject: [PATCH 4/9] updated to use same code as windows_gui demo --- recipes/llm-voice-assistant/python/main.py | 894 +++++++++++---------- 1 file changed, 474 insertions(+), 420 deletions(-) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index ae71166..2be1b65 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -1,11 +1,16 @@ +import json +import os import signal -import concurrent.futures +import sys import time from argparse import ArgumentParser -from collections import deque -from multiprocessing import Process, Queue +from concurrent.futures import ThreadPoolExecutor +from itertools import chain +from multiprocessing import Event, Pipe, Process, Queue, active_children +from multiprocessing.connection import Connection from typing import Optional, Sequence + import picollm import pvcheetah import pvorca @@ -15,16 +20,13 @@ class Commands: - INIT = 'init' - CLOSE = 'close' START = 'start' - INTERRUPT = 'interrupt' - TEXT = 'text' - GENERATE = 'generate' - SYNTHESIZE_START = 'synthesize-start' + CLOSE = 'close' + PROCESS = 'process' SYNTHESIZE = 'synthesize' - SYNTHESIZE_FLUSH = 'synthesize-flush' - PROFILE = 'profile' + SPEAK = 'speak' + FLUSH = 'flush' + INTERRUPT = 'interrupt' class RTFProfiler: @@ -105,307 +107,438 @@ def get_new_tokens(self) -> str: return self.new_tokens -def listen_worker(main_queue, listen_queue, access_key, keyword_model_path, cheetah_endpoint_duration_sec): - def handler(_, __) -> None: - main_queue.put({'command': Commands.CLOSE}) - - signal.signal(signal.SIGINT, handler) - - if keyword_model_path is None: - porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice']) - else: - porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path]) - porcupine_profiler = RTFProfiler(porcupine.sample_rate) - - main_queue.put({'command': Commands.INIT, 'name': 'Porcupine', 'version': porcupine.version}) - - cheetah = pvcheetah.create( - access_key=access_key, - endpoint_duration_sec=cheetah_endpoint_duration_sec, - enable_automatic_punctuation=True) - cheetah_profiler = RTFProfiler(cheetah.sample_rate) - - main_queue.put({'command': Commands.INIT, 'name': 'Cheetah', 'version': cheetah.version}) - - mic = PvRecorder(frame_length=porcupine.frame_length) - mic.start() - - main_queue.put({'command': Commands.INIT, 'name': 'PvRecorder', 'version': mic.version}) +class Speaker: + def __init__( + self, + speaker: PvSpeaker, + orca_warmup_sec: int): + self.speaker = speaker + self.orca_warmup = self.speaker.sample_rate * orca_warmup_sec + self.started = False + self.speaking = False + self.flushing = False + self.pcmBuffer = [] + self.executor = ThreadPoolExecutor() + self.future = None + + def close(self): + self.executor.shutdown() + + def start(self): + self.started = True + + def process(self, pcm: Optional[Sequence[int]]): + if self.started and pcm is not None: + self.pcmBuffer.extend(pcm) + + def flush(self): + self.flushing = True + + def interrupt(self): + self.started = False + if self.speaking: + self.speaking = False + self.flushing = False + self.pcmBuffer.clear() + self.speaker.stop() + + def tick(self): + def stop(): + self.speaker.flush() + self.speaker.stop() + if not self.speaking and len(self.pcmBuffer) > self.orca_warmup: + self.speaking = True + self.speaker.start() + if self.speaking and len(self.pcmBuffer) > 0: + written = self.speaker.write(self.pcmBuffer) + if written > 0: + del self.pcmBuffer[:written] + elif self.speaking and self.flushing and len(self.pcmBuffer) == 0: + self.started = False + self.speaking = False + self.flushing = False + self.future = self.executor.submit(stop) + if self.future and self.future.done(): + self.future = None + ppn_prompt = config['ppn_prompt'] + print(f'$ Say {ppn_prompt} ...', flush=True) + + +class Synthesizer: + def __init__( + self, + speaker: Speaker, + orca_connection: Connection, + orca_process: Process): + self.speaker = speaker + self.orca_connection = orca_connection + self.orca_process = orca_process + + def close(self): + self.orca_connection.send({'command': Commands.CLOSE}) + self.orca_process.join() + + def start(self): + self.speaker.start() + self.orca_connection.send({'command': Commands.START}) + + def process(self, text: str): + self.orca_connection.send({'command': Commands.PROCESS, 'text': text}) + + def flush(self): + self.orca_connection.send({'command': Commands.FLUSH}) + + def interrupt(self): + self.orca_connection.send({'command': Commands.INTERRUPT}) + while self.orca_connection.poll() and self.orca_connection.recv()['command'] != Commands.INTERRUPT: + time.sleep(0.01) + self.speaker.interrupt() + + def tick(self): + while self.orca_connection.poll(): + message = self.orca_connection.recv() + if message['command'] == Commands.SPEAK: + self.speaker.process(message['pcm']) + elif message['command'] == Commands.FLUSH: + self.speaker.flush() + + @staticmethod + def create_worker(config): + main_connection, process_connection = Pipe() + process = Process(target=Synthesizer.worker, args=(process_connection, config)) + process.start() + return main_connection, process + + @staticmethod + def worker(connection: Connection, config): + def handler(_, __) -> None: + pass + signal.signal(signal.SIGINT, handler) + + orca = pvorca.create(access_key=config['access_key']) + orca_stream = orca.stream_open() + connection.send(orca.sample_rate) + + try: + close = False + synthesizing = False + flushing = False + text_queue = Queue() + while not close: + while connection.poll(): + message = connection.recv() + if message['command'] == Commands.CLOSE: + close = True + elif message['command'] == Commands.START: + synthesizing = True + elif message['command'] == Commands.PROCESS: + if synthesizing: + text_queue.put(message['text']) + elif message['command'] == Commands.FLUSH: + flushing = True + elif message['command'] == Commands.INTERRUPT: + synthesizing = False + flushing = False + while not text_queue.empty(): + text_queue.get() + orca_stream.flush() + connection.send({'command': Commands.INTERRUPT}) + if not text_queue.empty(): + text = text_queue.get() + pcm = orca_stream.synthesize(text) + if pcm is not None: + connection.send({'command': Commands.SPEAK, 'pcm': pcm}) + if synthesizing and flushing and text_queue.empty(): + synthesizing = False + flushing = False + pcm = orca_stream.flush() + connection.send({'command': Commands.SPEAK, 'pcm': pcm}) + connection.send({'command': Commands.FLUSH}) + elif flushing: + flushing = False + finally: + orca_stream.close() + orca.delete() + + +class Generator: + def __init__( + self, + synthesizer: Synthesizer, + pllm_connection: Connection, + pllm_process: Process): + self.synthesizer = synthesizer + self.pllm_connection = pllm_connection + self.pllm_process = pllm_process + + def close(self): + self.pllm_connection.send({'command': Commands.CLOSE}) + self.pllm_process.join() + + def process(self, text: str): + ppn_prompt = config['ppn_prompt'] + print(f'LLM (say ${ppn_prompt} to interrupt) > ', end='', flush=True) + + self.synthesizer.start() + self.pllm_connection.send({'command': Commands.PROCESS, 'text': text}) + + def interrupt(self): + self.pllm_connection.send({'command': Commands.INTERRUPT}) + while self.pllm_connection.poll() and self.pllm_connection.recv()['command'] != Commands.INTERRUPT: + time.sleep(0.01) + print('', flush=True) + self.synthesizer.interrupt() - while listen_queue.empty(): - time.sleep(0.01) + def tick(self): + while self.pllm_connection.poll(): + message = self.pllm_connection.recv() + if message['command'] == Commands.SYNTHESIZE: + print(message['text'], end='', flush=True) + self.synthesizer.process(message['text']) + elif message['command'] == Commands.FLUSH: + print('', flush=True) + self.synthesizer.flush() + + @staticmethod + def create_worker(config): + main_connection, process_connection = Pipe() + process = Process(target=Generator.worker, args=(process_connection, config)) + process.start() + return main_connection, process + + @staticmethod + def worker(connection: Connection, config): + def handler(_, __) -> None: + pass + signal.signal(signal.SIGINT, handler) + + pllm = picollm.create( + access_key=config['access_key'], + model_path=config['picollm_model_path'], + device=config['picollm_device']) + if config['picollm_system_prompt'] is not None: + dialog = pllm.get_dialog(system=config['picollm_system_prompt']) + else: + dialog = pllm.get_dialog() + generating = False - try: - close = False - listening = False - user_request = '' - while not close: - if listen_queue.empty(): + stop_phrases = { + '', # Llama-2, Mistral, and Mixtral + '', # Gemma + '<|endoftext|>', # Phi-2 + '<|eot_id|>', # Llama-3 + '<|end|>', '<|user|>', '<|assistant|>', # Phi-3 + } + completion = CompletionText(stop_phrases) + + def llm_callback(text): + if generating: + completion.append(text) + new_tokens = completion.get_new_tokens() + if len(new_tokens) > 0: + connection.send({'command': Commands.SYNTHESIZE, 'text': new_tokens}) + + def llm_task(text): + short_answers_instruction = \ + "You are a voice assistant and your answers are very short but informative" + dialog.add_human_request( + f"{short_answers_instruction}. {text}" if config['short_answers'] else text) + + completion.reset() + return pllm.generate( + prompt=dialog.prompt(), + completion_token_limit=config['picollm_completion_token_limit'], + stop_phrases=stop_phrases, + presence_penalty=config['picollm_presence_penalty'], + frequency_penalty=config['picollm_frequency_penalty'], + temperature=config['picollm_temperature'], + top_p=config['picollm_top_p'], + stream_callback=llm_callback) + + try: + close = False + executor = ThreadPoolExecutor() + llm_future = None + interrupting = False + while not close: + while connection.poll(): + message = connection.recv() + if message['command'] == Commands.CLOSE: + close = True + elif message['command'] == Commands.PROCESS: + generating = True + text = message['text'] + llm_future = executor.submit(llm_task, text) + elif message['command'] == Commands.INTERRUPT: + interrupting = True + generating = False + pllm.interrupt() + if llm_future and llm_future.done(): + generating = False + llm_result = llm_future.result() + dialog.add_llm_response(llm_result.completion) + if llm_result.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED: + interrupting = False + connection.send({'command': Commands.INTERRUPT}) + else: + connection.send({'command': Commands.FLUSH}) + llm_future = None + if not llm_future and interrupting: + interrupting = False + connection.send({'command': Commands.INTERRUPT}) + finally: + while llm_future and llm_future.done(): time.sleep(0.01) + del executor + pllm.release() + + +class Listener: + def __init__( + self, + generator: Generator, + porcupine: pvporcupine.Porcupine, + cheetah: pvcheetah.Cheetah): + self.generator = generator + self.porcupine = porcupine + self.cheetah = cheetah + + self.sleeping = True + self.listening = False + self.user_request = '' + self.tick_count = 0 + + def close(self): + pass + + def process(self, pcm: Optional[Sequence[int]]): + if self.sleeping: + if self.porcupine.process(pcm) == 0: + self.sleeping = False + self.tick_count = 4 + self.generator.interrupt() + elif self.listening: + partial_transcript, endpoint_reached = self.cheetah.process(pcm) + if len(partial_transcript) > 0: + self.user_request += partial_transcript + print(partial_transcript, end='', flush=True) + if endpoint_reached: + self.sleeping = True + self.listening = False + remaining_transcript = self.cheetah.flush() + if len(remaining_transcript) > 0: + self.user_request += remaining_transcript + print(remaining_transcript, flush=True) + self.generator.process(self.user_request) + self.user_request = '' + elif self.tick_count > 0: + self.tick_count -= 1 + else: + self.listening = True + print('$ Wake word detected, utter your request or question ...', flush=True) + print('User > ', end='', flush=True) + + +class Recorder: + def __init__( + self, + listener: Listener, + recorder: PvRecorder): + self.listener = listener + self.recorder = recorder + self.recording = False + + def close(self): + if self.recording: + self.recorder.stop() + + def tick(self): + if not self.recording: + self.recording = True + self.recorder.start() + pcm = self.recorder.read() + self.listener.process(pcm) + +def main(config): + stop = [False] - while not listen_queue.empty(): - message = listen_queue.get() - if message['command'] == Commands.CLOSE: - close = True - - pcm = mic.read() - if not listening: - porcupine_profiler.tick() - wake_word_detected = porcupine.process(pcm) == 0 - porcupine_profiler.tock(pcm) - if wake_word_detected: - listening = True - main_queue.put({ - 'command': Commands.PROFILE, - 'text': f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]"}) - main_queue.put({'command': Commands.INTERRUPT}) - else: - cheetah_profiler.tick() - partial_transcript, endpoint_reached = cheetah.process(pcm) - cheetah_profiler.tock(pcm) - if len(partial_transcript) > 0: - user_request += partial_transcript - main_queue.put({'command': Commands.TEXT, 'text': partial_transcript}) - if endpoint_reached: - utterance_end_sec = time.perf_counter() - cheetah_profiler.tick() - remaining_transcript = cheetah.flush() - cheetah_profiler.tock(pcm) - user_request += remaining_transcript - main_queue.put({'command': Commands.TEXT, 'text': remaining_transcript}) - main_queue.put({ - 'command': Commands.GENERATE, - 'text': user_request, - 'utterance_end_sec': utterance_end_sec}) - main_queue.put({ - 'command': Commands.PROFILE, - 'text': f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]"}) - user_request = '' - listening = False - finally: - porcupine.delete() - cheetah.delete() - mic.delete() - - -def generate_worker( - main_queue, - generate_queue, - access_key, - picollm_model_path, - picollm_device, - picollm_completion_token_limit, - picollm_presence_penalty, - picollm_frequency_penalty, - picollm_temperature, - picollm_top_p, - short_answers): def handler(_, __) -> None: - main_queue.put({'command': Commands.CLOSE}) - + stop[0] = True signal.signal(signal.SIGINT, handler) - pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device) - pllm_profiler = TPSProfiler() - dialog = pllm.get_dialog() - generating = False - - main_queue.put({'command': Commands.INIT, 'name': 'picoLLM', 'version': f"{pllm.version} <{pllm.model}>"}) - - stop_phrases = { - '', # Llama-2, Mistral, and Mixtral - '', # Gemma - '<|endoftext|>', # Phi-2 - '<|eot_id|>', # Llama-3 - '<|end|>', '<|user|>', '<|assistant|>', # Phi-3 - } - - completion = CompletionText(stop_phrases) + pllm_connection, pllm_process = Generator.create_worker(config) + orca_connection, orca_process = Synthesizer.create_worker(config) - def llm_callback(text: str): - pllm_profiler.tock() - completion.append(text) - new_tokens = completion.get_new_tokens() - if len(new_tokens) > 0 and generating: - main_queue.put({'command': Commands.SYNTHESIZE, 'text': new_tokens}) - - def llm_task(user_request, utterance_end_sec): - short_answers_instruction = \ - "You are a voice assistant and your answers are very short but informative" - dialog.add_human_request( - f"{short_answers_instruction}. {user_request}" if short_answers else user_request) - - main_queue.put({'command': Commands.SYNTHESIZE_START, 'utterance_end_sec': utterance_end_sec}) - - res = pllm.generate( - prompt=dialog.prompt(), - completion_token_limit=picollm_completion_token_limit, - stop_phrases=stop_phrases, - presence_penalty=picollm_presence_penalty, - frequency_penalty=picollm_frequency_penalty, - temperature=picollm_temperature, - top_p=picollm_top_p, - stream_callback=llm_callback) - - dialog.add_llm_response(res.completion) - - if res.endpoint != picollm.PicoLLMEndpoints.INTERRUPTED: - main_queue.put({'command': Commands.SYNTHESIZE_FLUSH}) + if 'keyword_model_path' not in config: + porcupine = pvporcupine.create( + access_key=config['access_key'], + keywords=['picovoice'], + sensitivities=[config['porcupine_sensitivity']]) + config['ppn_prompt'] = '`Picovoice`' + else: + porcupine = pvporcupine.create( + access_key=config['access_key'], + keyword_paths=[config['keyword_model_path']], + sensitivities=[config['porcupine_sensitivity']]) + config['ppn_prompt'] = 'the wake word' - main_queue.put({'command': Commands.PROFILE, 'text': f"[picoLLM TPS: {pllm_profiler.tps():.2f}]"}) + cheetah = pvcheetah.create( + access_key=config['access_key'], + endpoint_duration_sec=config['cheetah_endpoint_duration_sec'], + enable_automatic_punctuation=True) - return res + pv_recorder = PvRecorder(frame_length=porcupine.frame_length) + pv_speaker = PvSpeaker(sample_rate=int(orca_connection.recv()), bits_per_sample=16, buffer_size_secs=1) - executor = concurrent.futures.ThreadPoolExecutor() + speaker = Speaker(pv_speaker, config['orca_warmup_sec']) + synthesizer = Synthesizer(speaker, orca_connection, orca_process) + generator = Generator(synthesizer, pllm_connection, pllm_process) + listener = Listener(generator, porcupine, cheetah) + recorder = Recorder(listener, pv_recorder) - while generate_queue.empty(): - time.sleep(0.01) + ppn_prompt = config['ppn_prompt'] + print(f'$ Say {ppn_prompt} ...', flush=True) try: - close = False - llm_future = None - while not close: - if generate_queue.empty(): - time.sleep(0.01) - - while not generate_queue.empty(): - message = generate_queue.get() - if message['command'] == Commands.CLOSE: - close = True - elif message['command'] == Commands.GENERATE: - generating = True - completion.reset() - llm_future = executor.submit( - llm_task, - message['text'], - message['utterance_end_sec']) - elif message['command'] == Commands.INTERRUPT and generating: - generating = False - pllm.interrupt() - - if llm_future and llm_future.done(): - llm_future = None - generating = False + while not stop[0]: + recorder.tick() + generator.tick() + synthesizer.tick() + speaker.tick() finally: - while llm_future and not llm_future.done(): - time.sleep(0.01) - - executor.shutdown(True) - pllm.release() - - -def speak_worker(main_queue, speak_queue, access_key, warmup_sec): - def handler(_, __) -> None: - main_queue.put({'command': Commands.CLOSE}) - - signal.signal(signal.SIGINT, handler) - - orca = pvorca.create(access_key=access_key) - orca_stream = orca.stream_open() - orca_profiler = RTFProfiler(orca.sample_rate) - warmup_size = int(warmup_sec * orca.sample_rate) + generator.interrupt() + generator.tick() + synthesizer.tick() + speaker.tick() + recorder.close() + listener.close() + generator.close() + synthesizer.close() + speaker.close() + + for child in active_children(): + child.terminate() - main_queue.put({'command': Commands.INIT, 'name': 'Orca', 'version': orca.version}) - - speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=1) - - main_queue.put({'command': Commands.INIT, 'name': 'PvSpeaker', 'version': speaker.version}) - - while speak_queue.empty(): - time.sleep(0.01) - - try: - close = False - synthesizing = False - speaking = False - flush = False - text_queue = deque() - pcm_queue = list() - delay_sec = -1 - utterance_end_sec = 0 - while not close: - if speak_queue.empty(): - time.sleep(0.01) + porcupine.delete() + cheetah.delete() + pv_recorder.delete() + pv_speaker.delete() - while not speak_queue.empty(): - message = speak_queue.get() - if message['command'] == Commands.CLOSE: - close = True - elif message['command'] == Commands.SYNTHESIZE_START: - synthesizing = True - utterance_end_sec = message['utterance_end_sec'] - delay_sec = -1 - elif message['command'] == Commands.SYNTHESIZE: - text_queue.append(message['text'].replace('\n', ' . ')) - elif message['command'] == Commands.INTERRUPT: - if synthesizing: - orca_profiler.tick() - pcm = orca_stream.flush() - orca_profiler.tock(pcm) - main_queue.put({ - 'command': Commands.PROFILE, - 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"}) - if speaking: - speaker.stop() - text_queue.clear() - pcm_queue.clear() - synthesizing = False - speaking = False - flush = False - elif message['command'] == Commands.SYNTHESIZE_FLUSH: - flush = True - - while len(text_queue) > 0: - text = text_queue.popleft() - orca_profiler.tick() - pcm = orca_stream.synthesize(text) - orca_profiler.tock(pcm) - if pcm is not None: - if delay_sec == -1: - delay_sec = time.perf_counter() - utterance_end_sec - pcm_queue.extend(pcm) - - if flush and synthesizing: - orca_profiler.tick() - pcm = orca_stream.flush() - orca_profiler.tock(pcm) - synthesizing = False - if pcm is not None: - pcm_queue.extend(pcm) - main_queue.put({ - 'command': Commands.PROFILE, - 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"}) - - if not speaking and len(pcm_queue) > warmup_size: - speaker.start() - speaking = True - - if speaking and len(pcm_queue) > 0: - written = speaker.write(pcm_queue) - if written > 0: - del pcm_queue[:written] - - if speaking and flush and len(pcm_queue) == 0: - speaker.flush(pcm_queue) - speaker.stop() - speaking = False - flush = False - main_queue.put({'command': Commands.START}) - finally: - orca_stream.close() - orca.delete() - speaker.delete() +if __name__ == '__main__': + if not sys.platform.lower().startswith('win'): + print('Error: Only runs on Windows platforms') + exit(1) -def main() -> None: parser = ArgumentParser() + parser.add_argument( + '--config', + help='path to a json config file to load the arguments from') parser.add_argument( '--access_key', - required=True, help='`AccessKey` obtained from `Picovoice Console` (https://console.picovoice.ai/).') parser.add_argument( '--picollm_model_path', - required=True, help='Absolute path to the file containing LLM parameters (`.pllm`).') parser.add_argument( '--keyword-model_path', @@ -413,7 +546,6 @@ def main() -> None: parser.add_argument( '--cheetah_endpoint_duration_sec', type=float, - default=1., help="Duration of silence (pause) after the user's utterance to consider it the end of the utterance.") parser.add_argument( '--picollm_device', @@ -426,24 +558,20 @@ def main() -> None: parser.add_argument( '--picollm_completion_token_limit', type=int, - default=256, help="Maximum number of tokens in the completion. Set to `None` to impose no limit.") parser.add_argument( '--picollm_presence_penalty', type=float, - default=0., help="It penalizes logits already appearing in the partial completion if set to a positive value. If set to " "`0.0`, it has no effect.") parser.add_argument( '--picollm_frequency_penalty', type=float, - default=0., help="If set to a positive floating-point value, it penalizes logits proportional to the frequency of their " "appearance in the partial completion. If set to `0.0`, it has no effect.") parser.add_argument( '--picollm_temperature', type=float, - default=0., help="Sampling temperature. Temperature is a non-negative floating-point value that controls the randomness of " "the sampler. A higher temperature smoothens the samplers' output, increasing the randomness. In " "contrast, a lower temperature creates a narrower distribution and reduces variability. Setting it to " @@ -451,149 +579,75 @@ def main() -> None: parser.add_argument( '--picollm_top_p', type=float, - default=1., help="A positive floating-point number within (0, 1]. It restricts the sampler's choices to high-probability " "logits that form the `top_p` portion of the probability mass. Hence, it avoids randomly selecting " "unlikely logits. A value of `1.` enables the sampler to pick any token with non-zero probability, " "turning off the feature.") + parser.add_argument( + '--picollm_system_prompt', + type=str, + help="A text prompt to give to the llm prior to it's input to instruct it on how to behave." + ) parser.add_argument( '--orca_warmup_sec', type=float, - default=0., help="Duration of the synthesized audio to buffer before streaming it out. A higher value helps slower " "(e.g., Raspberry Pi) to keep up with real-time at the cost of increasing the initial delay.") - parser.add_argument('--profile', action='store_true', help='Show runtime profiling information.') + parser.add_argument( + '--porcupine_sensitivity', + type=float, + help="Sensitivity for detecting keywords.") parser.add_argument('--short_answers', action='store_true') + parser.add_argument('--profile', action='store_true', help='Show runtime profiling information.') args = parser.parse_args() - access_key = args.access_key - picollm_model_path = args.picollm_model_path - keyword_model_path = args.keyword_model_path - cheetah_endpoint_duration_sec = args.cheetah_endpoint_duration_sec - picollm_device = args.picollm_device - picollm_completion_token_limit = args.picollm_completion_token_limit - picollm_presence_penalty = args.picollm_presence_penalty - picollm_frequency_penalty = args.picollm_frequency_penalty - picollm_temperature = args.picollm_temperature - picollm_top_p = args.picollm_top_p - orca_warmup_sec = args.orca_warmup_sec - profile = args.profile - short_answers = args.short_answers - - main_queue = Queue() - listen_queue = Queue() - generate_queue = Queue() - speak_queue = Queue() - - listen_process = Process(target=listen_worker, args=( - main_queue, - listen_queue, - access_key, - keyword_model_path, - cheetah_endpoint_duration_sec - )) - generate_process = Process(target=generate_worker, args=( - main_queue, - generate_queue, - access_key, - picollm_model_path, - picollm_device, - picollm_completion_token_limit, - picollm_presence_penalty, - picollm_frequency_penalty, - picollm_temperature, - picollm_top_p, - short_answers - )) - speak_process = Process(target=speak_worker, args=( - main_queue, - speak_queue, - access_key, - orca_warmup_sec - )) - - def handler(_, __) -> None: - main_queue.put({'command': Commands.CLOSE}) - - signal.signal(signal.SIGINT, handler) + if args.config is not None: + config_path = os.path.realpath(args.config) + else: + config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'config.json') + + if os.path.exists(config_path): + with open(config_path, 'r') as fd: + config = json.load(fd) + elif args.config is not None: + print(parser.error(f'File {config_path} does not exist')) + exit(1) + else: + config = {} - generate_process.start() - listen_process.start() - speak_process.start() - - modules = [ - 'Porcupine', - 'Cheetah', - 'PvRecorder', - 'picoLLM', - 'Orca', - 'PvSpeaker' + REQUIRED_ARGS = [ + 'access_key', + 'picollm_model_path' ] + DEFAULT_ARGS = { + 'access_key': '', + 'picollm_model_path': '', + 'cheetah_endpoint_duration_sec': 1, + 'picollm_device': 'best', + 'picollm_completion_token_limit': 256, + 'picollm_presence_penalty': 0, + 'picollm_frequency_penalty': 0, + 'picollm_temperature': 0, + 'picollm_top_p': 1, + 'picollm_system_prompt': None, + 'orca_warmup_sec': 0, + 'porcupine_sensitivity': 0.5, + 'short_answers': False, + 'profile': False + } - try: - close = False - listening = False - generating = False - while not close: - while main_queue.empty(): - time.sleep(0.01) - - message = main_queue.get(block=True) - if message['command'] == Commands.CLOSE: - close = True - elif message['command'] == Commands.INIT: - print(f"→ {message['name']} v{message['version']}") - modules.remove(message['name']) - if len(modules) == 0: - main_queue.put({'command': Commands.START}) - listen_queue.put({'command': Commands.START}) - generate_queue.put({'command': Commands.START}) - speak_queue.put({'command': Commands.START}) - elif message['command'] == Commands.START: - if not listening: - print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...") - elif message['command'] == Commands.INTERRUPT: - if generating: - print() - generating = False - print("$ Wake word detected, utter your request or question ...") - print("User > ", end='', flush=True) - generate_queue.put(message) - speak_queue.put(message) - listening = True - elif message['command'] == Commands.TEXT: - print(message['text'], end='', flush=True) - elif message['command'] == Commands.GENERATE: - print() - generate_queue.put(message) - listening = False - elif message['command'] == Commands.SYNTHESIZE_START: - wake_word = '`Picovoice`' if keyword_model_path is None else 'the wake word' - print(f"LLM (say {wake_word} to interrupt) > ", end='', flush=True) - speak_queue.put(message) - generating = True - elif message['command'] == Commands.SYNTHESIZE: - print(message['text'], end='', flush=True) - speak_queue.put(message) - elif message['command'] == Commands.SYNTHESIZE_FLUSH: - print() - speak_queue.put(message) - generating = False - elif message['command'] == Commands.PROFILE: - if profile: - print(message['text']) - finally: - generate_queue.put({'command': Commands.INTERRUPT}) - speak_queue.put({'command': Commands.INTERRUPT}) - - listen_queue.put({'command': Commands.CLOSE}) - generate_queue.put({'command': Commands.CLOSE}) - speak_queue.put({'command': Commands.CLOSE}) + for key in chain(REQUIRED_ARGS, DEFAULT_ARGS): + arg = getattr(args, key) + if arg is not None: + config[key] = arg - listen_process.join() - generate_process.join() - speak_process.join() + missing = [f'--{arg}' for arg in REQUIRED_ARGS if arg not in config] + if len(missing) > 0: + print(parser.error('the following arguments are required: ' + ', '.join(missing))) + exit(1) + for key in DEFAULT_ARGS: + if key not in config: + config[key] = DEFAULT_ARGS[key] -if __name__ == '__main__': - main() + main(config) From 84bf62c8398f518479a3cd7e314dc1058ad9f2f5 Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Mon, 30 Dec 2024 12:46:08 -0800 Subject: [PATCH 5/9] updated to include print statements --- .../python/{ => cli}/README.md | 0 .../python/{ => cli}/main.py | 112 +++++++++++++++--- .../python/cli/requirements.txt | 6 + .../python/windows_gui/README.md | 45 +++++++ .../{windows_gui.py => windows_gui/main.py} | 0 .../python/{ => windows_gui}/requirements.txt | 0 6 files changed, 144 insertions(+), 19 deletions(-) rename recipes/llm-voice-assistant/python/{ => cli}/README.md (100%) rename recipes/llm-voice-assistant/python/{ => cli}/main.py (85%) create mode 100644 recipes/llm-voice-assistant/python/cli/requirements.txt create mode 100644 recipes/llm-voice-assistant/python/windows_gui/README.md rename recipes/llm-voice-assistant/python/{windows_gui.py => windows_gui/main.py} (100%) rename recipes/llm-voice-assistant/python/{ => windows_gui}/requirements.txt (100%) diff --git a/recipes/llm-voice-assistant/python/README.md b/recipes/llm-voice-assistant/python/cli/README.md similarity index 100% rename from recipes/llm-voice-assistant/python/README.md rename to recipes/llm-voice-assistant/python/cli/README.md diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/cli/main.py similarity index 85% rename from recipes/llm-voice-assistant/python/main.py rename to recipes/llm-voice-assistant/python/cli/main.py index 2be1b65..50bb1c8 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/cli/main.py @@ -52,6 +52,11 @@ def rtf(self) -> float: self._audio_sec = 0. return rtf + def reset(self) -> None: + self._compute_sec = 0. + self._audio_sec = 0. + self._tick_sec = 0. + class TPSProfiler(object): def __init__(self) -> None: @@ -70,6 +75,10 @@ def tps(self) -> float: self._start_sec = 0. return tps + def reset(self) -> None: + self._num_tokens = 0 + self._start_sec = 0. + class CompletionText(object): def __init__(self, stop_phrases: list) -> None: @@ -111,9 +120,10 @@ class Speaker: def __init__( self, speaker: PvSpeaker, - orca_warmup_sec: int): + config): self.speaker = speaker - self.orca_warmup = self.speaker.sample_rate * orca_warmup_sec + self.config = config + self.orca_warmup = self.speaker.sample_rate * self.config['orca_warmup_sec'] self.started = False self.speaking = False self.flushing = False @@ -160,7 +170,7 @@ def stop(): self.future = self.executor.submit(stop) if self.future and self.future.done(): self.future = None - ppn_prompt = config['ppn_prompt'] + ppn_prompt = self.config['ppn_prompt'] print(f'$ Say {ppn_prompt} ...', flush=True) @@ -169,18 +179,20 @@ def __init__( self, speaker: Speaker, orca_connection: Connection, - orca_process: Process): + orca_process: Process, + config): self.speaker = speaker self.orca_connection = orca_connection self.orca_process = orca_process + self.config = config def close(self): self.orca_connection.send({'command': Commands.CLOSE}) self.orca_process.join() - def start(self): + def start(self, utterance_end_sec): self.speaker.start() - self.orca_connection.send({'command': Commands.START}) + self.orca_connection.send({'command': Commands.START, 'utterance_end_sec': utterance_end_sec}) def process(self, text: str): self.orca_connection.send({'command': Commands.PROCESS, 'text': text}) @@ -200,6 +212,11 @@ def tick(self): if message['command'] == Commands.SPEAK: self.speaker.process(message['pcm']) elif message['command'] == Commands.FLUSH: + if self.config['profile']: + rtf = message['profile'] + delay = message['delay'] + print(f'[Orca RTF: {round(rtf, 2)}]') + print(f"[Delay: {round(delay, 2)} sec]") self.speaker.flush() @staticmethod @@ -218,6 +235,11 @@ def handler(_, __) -> None: orca = pvorca.create(access_key=config['access_key']) orca_stream = orca.stream_open() connection.send(orca.sample_rate) + connection.send({'version': orca.version}) + + orca_profiler = RTFProfiler(orca.sample_rate) + utterance_end_sec = 0 + delay_sec = -1 try: close = False @@ -231,6 +253,7 @@ def handler(_, __) -> None: close = True elif message['command'] == Commands.START: synthesizing = True + utterance_end_sec = message['utterance_end_sec'] elif message['command'] == Commands.PROCESS: if synthesizing: text_queue.put(message['text']) @@ -243,17 +266,28 @@ def handler(_, __) -> None: text_queue.get() orca_stream.flush() connection.send({'command': Commands.INTERRUPT}) + orca_profiler.reset() + utterance_end_sec = 0 + delay_sec = -1 if not text_queue.empty(): text = text_queue.get() + orca_profiler.tick() pcm = orca_stream.synthesize(text) + orca_profiler.tock(pcm) if pcm is not None: connection.send({'command': Commands.SPEAK, 'pcm': pcm}) + if delay_sec == -1: + delay_sec = time.perf_counter() - utterance_end_sec if synthesizing and flushing and text_queue.empty(): synthesizing = False flushing = False + orca_profiler.tick() pcm = orca_stream.flush() + orca_profiler.tock(pcm) connection.send({'command': Commands.SPEAK, 'pcm': pcm}) - connection.send({'command': Commands.FLUSH}) + connection.send({'command': Commands.FLUSH, 'profile': orca_profiler.rtf(), 'delay': delay_sec}) + utterance_end_sec = 0 + delay_sec = -1 elif flushing: flushing = False finally: @@ -266,20 +300,22 @@ def __init__( self, synthesizer: Synthesizer, pllm_connection: Connection, - pllm_process: Process): + pllm_process: Process, + config): self.synthesizer = synthesizer self.pllm_connection = pllm_connection self.pllm_process = pllm_process + self.config = config def close(self): self.pllm_connection.send({'command': Commands.CLOSE}) self.pllm_process.join() - def process(self, text: str): - ppn_prompt = config['ppn_prompt'] + def process(self, text: str, utterance_end_sec): + ppn_prompt = self.config['ppn_prompt'] print(f'LLM (say ${ppn_prompt} to interrupt) > ', end='', flush=True) - self.synthesizer.start() + self.synthesizer.start(utterance_end_sec) self.pllm_connection.send({'command': Commands.PROCESS, 'text': text}) def interrupt(self): @@ -297,6 +333,9 @@ def tick(self): self.synthesizer.process(message['text']) elif message['command'] == Commands.FLUSH: print('', flush=True) + if self.config['profile']: + tps = message['profile'] + print(f'[picoLLM TPS: {round(tps, 2)}]') self.synthesizer.flush() @staticmethod @@ -316,12 +355,17 @@ def handler(_, __) -> None: access_key=config['access_key'], model_path=config['picollm_model_path'], device=config['picollm_device']) + + connection.send({'version': pllm.version, 'model': pllm.model}) + if config['picollm_system_prompt'] is not None: dialog = pllm.get_dialog(system=config['picollm_system_prompt']) else: dialog = pllm.get_dialog() generating = False + pllm_profiler = TPSProfiler() + stop_phrases = { '', # Llama-2, Mistral, and Mixtral '', # Gemma @@ -332,6 +376,7 @@ def handler(_, __) -> None: completion = CompletionText(stop_phrases) def llm_callback(text): + pllm_profiler.tock() if generating: completion.append(text) new_tokens = completion.get_new_tokens() @@ -368,6 +413,7 @@ def llm_task(text): elif message['command'] == Commands.PROCESS: generating = True text = message['text'] + pllm_profiler.reset() llm_future = executor.submit(llm_task, text) elif message['command'] == Commands.INTERRUPT: interrupting = True @@ -381,7 +427,7 @@ def llm_task(text): interrupting = False connection.send({'command': Commands.INTERRUPT}) else: - connection.send({'command': Commands.FLUSH}) + connection.send({'command': Commands.FLUSH, 'profile': pllm_profiler.tps()}) llm_future = None if not llm_future and interrupting: interrupting = False @@ -398,10 +444,14 @@ def __init__( self, generator: Generator, porcupine: pvporcupine.Porcupine, - cheetah: pvcheetah.Cheetah): + cheetah: pvcheetah.Cheetah, + config): self.generator = generator self.porcupine = porcupine self.cheetah = cheetah + self.config = config + self.porcupine_profiler = RTFProfiler(porcupine.sample_rate) + self.cheetah_profiler = RTFProfiler(cheetah.sample_rate) self.sleeping = True self.listening = False @@ -413,23 +463,37 @@ def close(self): def process(self, pcm: Optional[Sequence[int]]): if self.sleeping: - if self.porcupine.process(pcm) == 0: + self.porcupine_profiler.tick() + wake_word_detected = self.porcupine.process(pcm) == 0 + self.porcupine_profiler.tock(pcm) + if wake_word_detected: self.sleeping = False self.tick_count = 4 self.generator.interrupt() + if self.config['profile']: + print(f'[Porcupine RTF: {round(self.porcupine_profiler.rtf(), 2)}]') + self.porcupine_profiler.reset() + self.cheetah_profiler.reset() elif self.listening: + self.cheetah_profiler.tick() partial_transcript, endpoint_reached = self.cheetah.process(pcm) + self.cheetah_profiler.tock(pcm) if len(partial_transcript) > 0: self.user_request += partial_transcript print(partial_transcript, end='', flush=True) if endpoint_reached: + utterance_end_sec = time.perf_counter() self.sleeping = True self.listening = False + self.cheetah_profiler.tick() remaining_transcript = self.cheetah.flush() + self.cheetah_profiler.tock() if len(remaining_transcript) > 0: self.user_request += remaining_transcript print(remaining_transcript, flush=True) - self.generator.process(self.user_request) + if self.config['profile']: + print(f'[Cheetah RTF: {round(self.cheetah_profiler.rtf(), 2)}]') + self.generator.process(self.user_request, utterance_end_sec) self.user_request = '' elif self.tick_count > 0: self.tick_count -= 1 @@ -482,18 +546,28 @@ def handler(_, __) -> None: sensitivities=[config['porcupine_sensitivity']]) config['ppn_prompt'] = 'the wake word' + print(f"→ Porcupine v{porcupine.version}") + cheetah = pvcheetah.create( access_key=config['access_key'], endpoint_duration_sec=config['cheetah_endpoint_duration_sec'], enable_automatic_punctuation=True) + + print(f"→ Cheetah v{cheetah.version}") pv_recorder = PvRecorder(frame_length=porcupine.frame_length) pv_speaker = PvSpeaker(sample_rate=int(orca_connection.recv()), bits_per_sample=16, buffer_size_secs=1) - speaker = Speaker(pv_speaker, config['orca_warmup_sec']) - synthesizer = Synthesizer(speaker, orca_connection, orca_process) - generator = Generator(synthesizer, pllm_connection, pllm_process) - listener = Listener(generator, porcupine, cheetah) + pllm_info = pllm_connection.recv() + print(f"→ picoLLM v{pllm_info['version']} <{pllm_info['model']}>") + + orca_info = orca_connection.recv() + print(f"→ Orca v{orca_info['version']}") + + speaker = Speaker(pv_speaker, config) + synthesizer = Synthesizer(speaker, orca_connection, orca_process, config) + generator = Generator(synthesizer, pllm_connection, pllm_process, config) + listener = Listener(generator, porcupine, cheetah, config) recorder = Recorder(listener, pv_recorder) ppn_prompt = config['ppn_prompt'] diff --git a/recipes/llm-voice-assistant/python/cli/requirements.txt b/recipes/llm-voice-assistant/python/cli/requirements.txt new file mode 100644 index 0000000..f0cf97c --- /dev/null +++ b/recipes/llm-voice-assistant/python/cli/requirements.txt @@ -0,0 +1,6 @@ +picollm==1.2.3 +pvcheetah==2.0.1 +pvorca==1.0.0 +pvporcupine==3.0.2 +pvrecorder==1.2.2 +pvspeaker==1.0.3 \ No newline at end of file diff --git a/recipes/llm-voice-assistant/python/windows_gui/README.md b/recipes/llm-voice-assistant/python/windows_gui/README.md new file mode 100644 index 0000000..85bf491 --- /dev/null +++ b/recipes/llm-voice-assistant/python/windows_gui/README.md @@ -0,0 +1,45 @@ +## Compatibility + +- Python 3.8+ +- Runs on Windows (x86_64). + +## AccessKey + +AccessKey is your authentication and authorization token for deploying Picovoice SDKs, including picoLLM. Anyone who is +using Picovoice needs to have a valid AccessKey. You must keep your AccessKey secret. You would need internet +connectivity to validate your AccessKey with Picovoice license servers even though the LLM inference is running 100% +offline and completely free for open-weight models. Everyone who signs up for +[Picovoice Console](https://console.picovoice.ai/) receives a unique AccessKey. + +## picoLLM Model + +picoLLM Inference Engine supports many open-weight models. The models are on +[Picovoice Console](https://console.picovoice.ai/). + +## Usage + +Install the required packages: + +```console +pip install -r requirements.txt +``` + +Run the demo: + +```console +python3 main.py --access_key ${ACCESS_KEY} --picollm_model_path ${PICOLLM_MODEL_PATH} +``` + +Replace `${ACCESS_KEY}` with yours obtained from Picovoice Console and `${PICOLLM_MODEL_PATH}` with the path to the +model downloaded from Picovoice Console. + +To see all available options, type the following: + +```console +python main.py --help +``` + +## Custom Wake Word + +The demo's default wake phrase is `Jarvis`. You can generate your custom (branded) wake word using Picovoice Console by following [Porcupine Wake Word documentation (https://picovoice.ai/docs/porcupine/). Once you have the model trained, simply pass it to the demo +application using `--keyword_model_path` argument. \ No newline at end of file diff --git a/recipes/llm-voice-assistant/python/windows_gui.py b/recipes/llm-voice-assistant/python/windows_gui/main.py similarity index 100% rename from recipes/llm-voice-assistant/python/windows_gui.py rename to recipes/llm-voice-assistant/python/windows_gui/main.py diff --git a/recipes/llm-voice-assistant/python/requirements.txt b/recipes/llm-voice-assistant/python/windows_gui/requirements.txt similarity index 100% rename from recipes/llm-voice-assistant/python/requirements.txt rename to recipes/llm-voice-assistant/python/windows_gui/requirements.txt From fb8848d15bfc456f11f6f6456adda117c07b0585 Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Mon, 30 Dec 2024 12:50:21 -0800 Subject: [PATCH 6/9] fixed python codestyle --- recipes/llm-voice-assistant/python/cli/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/recipes/llm-voice-assistant/python/cli/main.py b/recipes/llm-voice-assistant/python/cli/main.py index 50bb1c8..b8609fc 100644 --- a/recipes/llm-voice-assistant/python/cli/main.py +++ b/recipes/llm-voice-assistant/python/cli/main.py @@ -523,6 +523,7 @@ def tick(self): pcm = self.recorder.read() self.listener.process(pcm) + def main(config): stop = [False] @@ -552,7 +553,7 @@ def handler(_, __) -> None: access_key=config['access_key'], endpoint_duration_sec=config['cheetah_endpoint_duration_sec'], enable_automatic_punctuation=True) - + print(f"→ Cheetah v{cheetah.version}") pv_recorder = PvRecorder(frame_length=porcupine.frame_length) From 70627317303c6be49851187ebee4c127683a7205 Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Thu, 2 Jan 2025 11:05:50 -0800 Subject: [PATCH 7/9] added newline --- recipes/llm-voice-assistant/python/cli/main.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/recipes/llm-voice-assistant/python/cli/main.py b/recipes/llm-voice-assistant/python/cli/main.py index b8609fc..4be536e 100644 --- a/recipes/llm-voice-assistant/python/cli/main.py +++ b/recipes/llm-voice-assistant/python/cli/main.py @@ -499,7 +499,7 @@ def process(self, pcm: Optional[Sequence[int]]): self.tick_count -= 1 else: self.listening = True - print('$ Wake word detected, utter your request or question ...', flush=True) + print('\n$ Wake word detected, utter your request or question ...', flush=True) print('User > ', end='', flush=True) @@ -601,10 +601,6 @@ def handler(_, __) -> None: if __name__ == '__main__': - if not sys.platform.lower().startswith('win'): - print('Error: Only runs on Windows platforms') - exit(1) - parser = ArgumentParser() parser.add_argument( '--config', From 7faf7d92784a1be92c135308e032c88487febbb5 Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Thu, 2 Jan 2025 12:50:30 -0800 Subject: [PATCH 8/9] updated readme --- recipes/llm-voice-assistant/python/cli/README.md | 15 +++++++++++++++ .../python/cli/requirements.txt | 2 +- .../python/windows_gui/README.md | 15 +++++++++++++++ .../python/windows_gui/requirements.txt | 2 +- 4 files changed, 32 insertions(+), 2 deletions(-) diff --git a/recipes/llm-voice-assistant/python/cli/README.md b/recipes/llm-voice-assistant/python/cli/README.md index 3bd2cb1..f763480 100644 --- a/recipes/llm-voice-assistant/python/cli/README.md +++ b/recipes/llm-voice-assistant/python/cli/README.md @@ -1,3 +1,7 @@ +# LLM Voice Assistant CLI Demo + +A voice assistant using Porcupine, Cheetah, picoLLM, and Orca with a text based interface. + ## See It In Action! [![LLM VA in Action](https://img.youtube.com/vi/06K_YtUr8mc/0.jpg)](https://www.youtube.com/watch?v=06K_YtUr8mc) @@ -43,6 +47,17 @@ To see all available options, type the following: python main.py --help ``` +## Config File + +In addition to command line arguments a config file can be used to pass arguments to the demo. By default the demo looks for `config.json` in the same directory as `main.py` but an alternative path can be passed using the `--config` option. Below is an example config file. + +```json +{ + "access_key": "${ACCESS_KEY}", + "picollm_model_path": "${PICOLLM_MODEL_PATH}" +} +``` + ## Custom Wake Word The demo's default wake phrase is `Picovoice`. You can generate your custom (branded) wake word using Picovoice Console by following [Porcupine Wake Word documentation (https://picovoice.ai/docs/porcupine/). Once you have the model trained, simply pass it to the demo diff --git a/recipes/llm-voice-assistant/python/cli/requirements.txt b/recipes/llm-voice-assistant/python/cli/requirements.txt index f0cf97c..5f73eac 100644 --- a/recipes/llm-voice-assistant/python/cli/requirements.txt +++ b/recipes/llm-voice-assistant/python/cli/requirements.txt @@ -1,5 +1,5 @@ picollm==1.2.3 -pvcheetah==2.0.1 +pvcheetah==2.1.0 pvorca==1.0.0 pvporcupine==3.0.2 pvrecorder==1.2.2 diff --git a/recipes/llm-voice-assistant/python/windows_gui/README.md b/recipes/llm-voice-assistant/python/windows_gui/README.md index 85bf491..a3ae570 100644 --- a/recipes/llm-voice-assistant/python/windows_gui/README.md +++ b/recipes/llm-voice-assistant/python/windows_gui/README.md @@ -1,3 +1,7 @@ +# LLM Voice Assistant GUI Demo + +A voice assistant using Porcupine, Cheetah, picoLLM, and Orca with a console based graphical interface. + ## Compatibility - Python 3.8+ @@ -39,6 +43,17 @@ To see all available options, type the following: python main.py --help ``` +## Config File + +In addition to command line arguments a config file can be used to pass arguments to the demo. By default the demo looks for `config.json` in the same directory as `main.py` but an alternative path can be passed using the `--config` option. Below is an example config file. + +```json +{ + "access_key": "${ACCESS_KEY}", + "picollm_model_path": "${PICOLLM_MODEL_PATH}" +} +``` + ## Custom Wake Word The demo's default wake phrase is `Jarvis`. You can generate your custom (branded) wake word using Picovoice Console by following [Porcupine Wake Word documentation (https://picovoice.ai/docs/porcupine/). Once you have the model trained, simply pass it to the demo diff --git a/recipes/llm-voice-assistant/python/windows_gui/requirements.txt b/recipes/llm-voice-assistant/python/windows_gui/requirements.txt index 3c73f69..2de6e31 100644 --- a/recipes/llm-voice-assistant/python/windows_gui/requirements.txt +++ b/recipes/llm-voice-assistant/python/windows_gui/requirements.txt @@ -1,5 +1,5 @@ picollm==1.2.3 -pvcheetah==2.0.1 +pvcheetah==2.1.0 pvorca==1.0.0 pvporcupine==3.0.2 pvrecorder==1.2.2 From 54b0586a9cb6d8160e0098b536c2d44812c75890 Mon Sep 17 00:00:00 2001 From: matt200-ok Date: Thu, 2 Jan 2025 13:52:18 -0800 Subject: [PATCH 9/9] Apply suggestions from code review Co-authored-by: Ian Lavery --- recipes/llm-voice-assistant/python/cli/README.md | 4 ++-- recipes/llm-voice-assistant/python/windows_gui/README.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/recipes/llm-voice-assistant/python/cli/README.md b/recipes/llm-voice-assistant/python/cli/README.md index f763480..7c64a06 100644 --- a/recipes/llm-voice-assistant/python/cli/README.md +++ b/recipes/llm-voice-assistant/python/cli/README.md @@ -1,6 +1,6 @@ -# LLM Voice Assistant CLI Demo +# Cross-Platform LLM Voice Assistant CLI Demo -A voice assistant using Porcupine, Cheetah, picoLLM, and Orca with a text based interface. +A cross-platform voice assistant using Picovoice's Wake Word, STT, TTS and LLM technology with a text-based interface. ## See It In Action! diff --git a/recipes/llm-voice-assistant/python/windows_gui/README.md b/recipes/llm-voice-assistant/python/windows_gui/README.md index a3ae570..d01bc46 100644 --- a/recipes/llm-voice-assistant/python/windows_gui/README.md +++ b/recipes/llm-voice-assistant/python/windows_gui/README.md @@ -1,6 +1,6 @@ -# LLM Voice Assistant GUI Demo +# Windows LLM Voice Assistant GUI Demo -A voice assistant using Porcupine, Cheetah, picoLLM, and Orca with a console based graphical interface. +A voice assistant for Windows using Picovoice's Wake Word, STT, TTS and LLM technology with a console-based graphical interface. ## Compatibility