From 3c00157d8bcda1b3bd92a49888f453509c7b0c0d Mon Sep 17 00:00:00 2001
From: Matthew Maxwell <matthew@picovoice.ai>
Date: Fri, 29 Nov 2024 09:30:42 -0800
Subject: [PATCH 1/9] changed interrupt to work after picollm has finished
 generating

---
 recipes/llm-voice-assistant/python/main.py | 644 ++++++++++++---------
 1 file changed, 378 insertions(+), 266 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index 054669a..ccb1df5 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -3,15 +3,8 @@
 import time
 from argparse import ArgumentParser
 from collections import deque
-from itertools import chain
-from multiprocessing import (
-    Pipe,
-    Process,
-)
-from typing import (
-    Optional,
-    Sequence,
-)
+from multiprocessing import Process, Queue
+from typing import Optional, Sequence
 
 import picollm
 import pvcheetah
@@ -21,6 +14,18 @@
 from pvspeaker import PvSpeaker
 
 
+class Commands:
+    CLOSE = 'close'
+    START = 'start'
+    INTERRUPT = 'interrupt'
+    TEXT = 'text'
+    GENERATE = 'generate'
+    SYNTHESIZE_START = 'synthesize-start'
+    SYNTHESIZE = 'synthesize'
+    SYNTHESIZE_FLUSH = 'synthesize-flush'
+    PROFILE = 'profile'
+
+
 class RTFProfiler:
     def __init__(self, sample_rate: int) -> None:
         self._sample_rate = sample_rate
@@ -67,6 +72,11 @@ def __init__(self, stop_phrases: list) -> None:
         self.text: str = ''
         self.new_tokens: str = ''
 
+    def reset(self):
+        self.start: int = 0
+        self.text: str = ''
+        self.new_tokens: str = ''
+
     def append(self, text: str) -> None:
         self.text += text
         end = len(self.text)
@@ -91,109 +101,276 @@ def get_new_tokens(self) -> str:
         return self.new_tokens
 
 
-def orca_worker(access_key: str, connection, warmup_sec: float, stream_frame_sec: int = 0.03) -> None:
-    orca = pvorca.create(access_key=access_key)
-    orca_stream = orca.stream_open()
+def listen_worker(main_queue, listen_queue, access_key, keyword_model_path, cheetah_endpoint_duration_sec):
+    def handler(_, __) -> None:
+        main_queue.put({'command': Commands.CLOSE})
 
-    texts = list()
-    pcm_deque = deque()
-    warmup = [False]
-    synthesize = False
-    flush = False
-    close = False
-    interrupt = False
-    utterance_end_sec = 0.
-    delay_sec = [-1.]
+    signal.signal(signal.SIGINT, handler)
 
-    speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=20)
+    if keyword_model_path is None:
+        porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice'])
+    else:
+        porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path])
+    porcupine_profiler = RTFProfiler(porcupine.sample_rate)
 
-    connection.send({'version': orca.version})
+    main_queue.put({'command': 'init', 'name': 'Porcupine', 'version': porcupine.version})
 
-    orca_profiler = RTFProfiler(orca.sample_rate)
+    cheetah = pvcheetah.create(
+        access_key=access_key,
+        endpoint_duration_sec=cheetah_endpoint_duration_sec,
+        enable_automatic_punctuation=True)
+    cheetah_profiler = RTFProfiler(cheetah.sample_rate)
 
-    def buffer_pcm(pcm_chunk: Optional[Sequence[int]]) -> None:
-        if pcm_chunk is not None:
-            if delay_sec[0] == -1:
-                delay_sec[0] = time.perf_counter() - utterance_end_sec
+    main_queue.put({'command': 'init', 'name': 'Cheetah', 'version': cheetah.version})
 
-            pcm_deque.append(pcm_chunk)
+    mic = PvRecorder(frame_length=porcupine.frame_length)
+    mic.start()
+
+    main_queue.put({'command': 'init', 'name': 'PvRecorder', 'version': mic.version})
+
+    while listen_queue.empty():
+        time.sleep(0.01)
+    listen_queue.get()
 
-    def play_buffered_pcm() -> None:
-        if warmup[0]:
-            if len(list(chain.from_iterable(pcm_deque))) < int(warmup_sec * orca.sample_rate):
-                return
+    try:
+        close = False
+        listening = False
+        user_request = ''
+        while not close:
+            if listen_queue.empty():
+                time.sleep(0.01)
+
+            while not listen_queue.empty():
+                message = listen_queue.get()
+                if message['command'] == Commands.CLOSE:
+                    close = True
+
+            pcm = mic.read()
+            if not listening:
+                porcupine_profiler.tick()
+                wake_word_detected = porcupine.process(pcm) == 0
+                porcupine_profiler.tock(pcm)
+                if wake_word_detected:
+                    listening = True
+                    main_queue.put({'command': Commands.PROFILE, 'text': f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]"})
+                    main_queue.put({'command': Commands.INTERRUPT})
             else:
-                warmup[0] = False
-
-        if len(pcm_deque) > 0:
-            pcm_chunk = list(chain.from_iterable(pcm_deque))
-            pcm_deque.clear()
-
-            written = speaker.write(pcm_chunk)
-            if written < len(pcm_chunk):
-                pcm_deque.appendleft(pcm_chunk[written:])
-
-    while True:
-        if synthesize and len(texts) > 0:
-            orca_profiler.tick()
-            pcm = orca_stream.synthesize(texts.pop(0))
-            orca_profiler.tock(pcm)
-            buffer_pcm(pcm)
-            play_buffered_pcm()
-        elif flush:
-            while len(texts) > 0:
+                cheetah_profiler.tick()
+                partial_transcript, endpoint_reached = cheetah.process(pcm)
+                cheetah_profiler.tock(pcm)
+                user_request += partial_transcript
+                main_queue.put({'command': Commands.TEXT, 'text': partial_transcript})
+                if endpoint_reached:
+                    utterance_end_sec = time.perf_counter()
+                    cheetah_profiler.tick()
+                    remaining_transcript = cheetah.flush()
+                    cheetah_profiler.tock(pcm)
+                    user_request += remaining_transcript
+                    main_queue.put({'command': Commands.TEXT, 'text': remaining_transcript})
+                    main_queue.put({'command': Commands.GENERATE, 'text': user_request, 'utterance_end_sec': utterance_end_sec})
+                    main_queue.put({'command': Commands.PROFILE, 'text': f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]"})
+                    user_request = ''
+                    listening = False
+    finally:
+        porcupine.delete()
+        cheetah.delete()
+        mic.delete()
+
+
+def generate_worker(main_queue, generate_queue, access_key, picollm_model_path, picollm_device, picollm_completion_token_limit, picollm_presence_penalty, picollm_frequency_penalty, picollm_temperature, picollm_top_p, short_answers):
+    def handler(_, __) -> None:
+        main_queue.put({'command': Commands.CLOSE})
+
+    signal.signal(signal.SIGINT, handler)
+
+    pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device)
+    pllm_profiler = TPSProfiler()
+    dialog = pllm.get_dialog()
+    generating = False
+
+    main_queue.put({'command': 'init', 'name': 'picoLLM', 'version': f"{pllm.version} <{pllm.model}>"})
+
+    stop_phrases = {
+        '</s>',  # Llama-2, Mistral, and Mixtral
+        '<end_of_turn>',  # Gemma
+        '<|endoftext|>',  # Phi-2
+        '<|eot_id|>',  # Llama-3
+        '<|end|>', '<|user|>', '<|assistant|>',  # Phi-3
+    }
+
+    completion = CompletionText(stop_phrases)
+
+    def llm_callback(text: str):
+        pllm_profiler.tock()
+        completion.append(text)
+        new_tokens = completion.get_new_tokens()
+        if len(new_tokens) > 0 and generating:
+            main_queue.put({'command': Commands.SYNTHESIZE, 'text': new_tokens})
+
+    def llm_task(user_request, utterance_end_sec):
+        short_answers_instruction = \
+            "You are a voice assistant and your answers are very short but informative"
+        dialog.add_human_request(
+            f"{short_answers_instruction}. {user_request}" if short_answers else user_request)
+
+        main_queue.put({'command': Commands.SYNTHESIZE_START, 'utterance_end_sec': utterance_end_sec})
+
+        res = pllm.generate(
+            prompt=dialog.prompt(),
+            completion_token_limit=picollm_completion_token_limit,
+            stop_phrases=stop_phrases,
+            presence_penalty=picollm_presence_penalty,
+            frequency_penalty=picollm_frequency_penalty,
+            temperature=picollm_temperature,
+            top_p=picollm_top_p,
+            stream_callback=llm_callback)
+
+        dialog.add_llm_response(res.completion)
+
+        if res.endpoint != picollm.PicoLLMEndpoints.INTERRUPTED:
+            main_queue.put({'command': Commands.SYNTHESIZE_FLUSH})
+
+        main_queue.put({'command': Commands.PROFILE, 'text': f"[picoLLM TPS: {pllm_profiler.tps():.2f}]"})
+
+        return res
+
+    executor = concurrent.futures.ThreadPoolExecutor()
+
+    while generate_queue.empty():
+        time.sleep(0.01)
+    generate_queue.get()
+
+    try:
+        close = False
+        llm_future = None
+        while not close:
+            if generate_queue.empty():
+                time.sleep(0.01)
+
+            while not generate_queue.empty():
+                message = generate_queue.get()
+                if message['command'] == Commands.CLOSE:
+                    close = True
+                elif message['command'] == Commands.GENERATE:
+                    generating = True
+                    completion.reset()
+                    llm_future = executor.submit(
+                        llm_task,
+                        message['text'],
+                        message['utterance_end_sec'])
+                elif message['command'] == Commands.INTERRUPT and generating:
+                    generating = False
+                    pllm.interrupt()
+
+                if llm_future and llm_future.done():
+                    llm_future = None
+                    generating = False
+    finally:
+        while llm_future and not llm_future.done():
+            time.sleep(0.01)
+
+        executor.shutdown(True)
+        pllm.release()
+
+
+def speak_worker(main_queue, speak_queue, access_key, warmup_sec):
+    def handler(_, __) -> None:
+        main_queue.put({'command': Commands.CLOSE})
+
+    signal.signal(signal.SIGINT, handler)
+
+    orca = pvorca.create(access_key=access_key)
+    orca_stream = orca.stream_open()
+    orca_profiler = RTFProfiler(orca.sample_rate)
+    warmup_size = int(warmup_sec * orca.sample_rate)
+
+    main_queue.put({'command': 'init', 'name': 'Orca', 'version': orca.version})
+
+    speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=20)
+
+    main_queue.put({'command': 'init', 'name': 'PvSpeaker', 'version': speaker.version})
+
+    while speak_queue.empty():
+        time.sleep(0.01)
+    speak_queue.get()
+
+    try:
+        close = False
+        synthesizing = False
+        speaking = False
+        flush = False
+        text_queue = deque()
+        pcm_queue = list()
+        delay_sec = -1
+        utterance_end_sec = 0
+        while not close:
+            if speak_queue.empty():
+                time.sleep(0.01)
+
+            while not speak_queue.empty():
+                message = speak_queue.get()
+                if message['command'] == Commands.CLOSE:
+                    close = True
+                elif message['command'] == Commands.SYNTHESIZE_START:
+                    synthesizing = True
+                    utterance_end_sec = message['utterance_end_sec']
+                    delay_sec = -1
+                elif message['command'] == Commands.SYNTHESIZE:
+                    text_queue.append(message['text'].replace('\n', ' . '))
+                elif message['command'] == Commands.INTERRUPT:
+                    if synthesizing:
+                        orca_profiler.tick()
+                        pcm = orca_stream.flush()
+                        orca_profiler.tock(pcm)
+                        main_queue.put({'command': Commands.PROFILE, 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"})
+                    if speaking:
+                        speaker.stop()
+                    text_queue.clear()
+                    pcm_queue.clear()
+                    synthesizing = False
+                    speaking = False
+                    flush = False
+                elif message['command'] == Commands.SYNTHESIZE_FLUSH:
+                    flush = True
+
+            while len(text_queue) > 0:
+                text = text_queue.popleft()
                 orca_profiler.tick()
-                pcm = orca_stream.synthesize(texts.pop(0))
+                pcm = orca_stream.synthesize(text)
                 orca_profiler.tock(pcm)
-                buffer_pcm(pcm)
-                play_buffered_pcm()
-            orca_profiler.tick()
-            pcm = orca_stream.flush()
-            orca_profiler.tock(pcm)
-            buffer_pcm(pcm)
-            play_buffered_pcm()
-            connection.send({'rtf': orca_profiler.rtf(), 'delay': delay_sec[0]})
-            flush = False
-            speaker.flush(list(chain.from_iterable(pcm_deque)))
-            pcm_deque.clear()
-            speaker.stop()
-            delay_sec[0] = -1
-            connection.send({'done': True})
-        elif close:
-            break
-        elif interrupt:
-            orca_profiler.tick()
-            pcm = orca_stream.flush()
-            orca_profiler.tock(pcm)
-            connection.send({'rtf': orca_profiler.rtf(), 'delay': delay_sec[0]})
-            interrupt = False
-            pcm_deque.clear()
-            speaker.stop()
-            delay_sec[0] = -1
-            connection.send({'done': True})
-        else:
-            time.sleep(stream_frame_sec)
-
-        while connection.poll():
-            message = connection.recv()
-            if message['command'] == 'synthesize':
-                texts.append(message['text'])
-                if not speaker.is_started:
-                    speaker.start()
-                    warmup[0] = True
-                utterance_end_sec = message['utterance_end_sec']
-                synthesize = True
-            elif message['command'] == 'flush':
-                synthesize = False
-                flush = True
-            elif message['command'] == 'close':
-                close = True
-            elif message['command'] == 'interrupt':
-                interrupt = True
+                if pcm is not None:
+                    if delay_sec == -1:
+                        delay_sec = time.perf_counter() - utterance_end_sec
+                    pcm_queue.extend(pcm)
 
-    speaker.delete()
-    orca_stream.close()
-    orca.delete()
+            if flush and synthesizing:
+                orca_profiler.tick()
+                pcm = orca_stream.flush()
+                orca_profiler.tock(pcm)
+                synthesizing = False
+                if pcm is not None:
+                    pcm_queue.extend(pcm)
+                    main_queue.put({'command': Commands.PROFILE, 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"})
+
+            if not speaking and len(pcm_queue) > warmup_size:
+                speaker.start()
+                speaking = True
+
+            if speaking and len(pcm_queue) > 0:
+                written = speaker.write(pcm_queue)
+                if written > 0:
+                    del pcm_queue[:written]
+
+            if speaking and flush and len(pcm_queue) == 0:
+                speaker.flush(pcm_queue)
+                speaker.stop()
+                speaking = False
+                flush = False
+                main_queue.put({'command': Commands.START})
+    finally:
+        orca_stream.close()
+        orca.delete()
+        speaker.delete()
 
 
 def main() -> None:
@@ -279,180 +456,115 @@ def main() -> None:
     profile = args.profile
     short_answers = args.short_answers
 
-    if keyword_model_path is None:
-        porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice'])
-    else:
-        porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path])
-    print(f"→ Porcupine v{porcupine.version}")
-
-    cheetah = pvcheetah.create(
-        access_key=access_key,
-        endpoint_duration_sec=cheetah_endpoint_duration_sec,
-        enable_automatic_punctuation=True)
-    print(f"→ Cheetah v{cheetah.version}")
-
-    pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device)
-    dialog = pllm.get_dialog()
-    print(f"→ picoLLM v{pllm.version} <{pllm.model}>")
-
-    main_connection, orca_process_connection = Pipe()
-    orca_process = Process(target=orca_worker, args=(access_key, orca_process_connection, orca_warmup_sec))
-    orca_process.start()
-    while not main_connection.poll():
-        time.sleep(0.01)
-    print(f"→ Orca v{main_connection.recv()['version']}")
-
-    mic = PvRecorder(frame_length=porcupine.frame_length)
-    mic.start()
-
-    print(f"\n$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...")
-
-    stop = [False]
+    main_queue = Queue()
+    listen_queue = Queue()
+    generate_queue = Queue()
+    speak_queue = Queue()
+
+    listen_process = Process(target=listen_worker, args=(
+        main_queue,
+        listen_queue,
+        access_key,
+        keyword_model_path,
+        cheetah_endpoint_duration_sec
+    ))
+    generate_process = Process(target=generate_worker, args=(
+        main_queue,
+        generate_queue,
+        access_key,
+        picollm_model_path,
+        picollm_device,
+        picollm_completion_token_limit,
+        picollm_presence_penalty,
+        picollm_frequency_penalty,
+        picollm_temperature,
+        picollm_top_p,
+        short_answers
+    ))
+    speak_process = Process(target=speak_worker, args=(
+        main_queue,
+        speak_queue,
+        access_key,
+        orca_warmup_sec
+    ))
 
     def handler(_, __) -> None:
-        stop[0] = True
+        main_queue.put({'command': Commands.CLOSE})
 
     signal.signal(signal.SIGINT, handler)
 
-    def llm_task(dialog, user_request, utterance_end_sec, main_connection):
-        short_answers_instruction = \
-            "You are a voice assistant and your answers are very short but informative"
-        dialog.add_human_request(
-            f"{short_answers_instruction}. {user_request}" if short_answers else user_request)
-
-        picollm_profiler = TPSProfiler()
-
-        stop_phrases = {
-            '</s>',  # Llama-2, Mistral, and Mixtral
-            '<end_of_turn>',  # Gemma
-            '<|endoftext|>',  # Phi-2
-            '<|eot_id|>',  # Llama-3
-            '<|end|>', '<|user|>', '<|assistant|>',  # Phi-3
-        }
-
-        completion = CompletionText(stop_phrases)
-
-        def llm_callback(text: str) -> None:
-            picollm_profiler.tock()
-            completion.append(text)
-            new_tokens = completion.get_new_tokens()
-            if len(new_tokens) > 0:
-                main_connection.send({
-                    'command': 'synthesize',
-                    'text': new_tokens.replace('\n', ' . '),
-                    'utterance_end_sec': utterance_end_sec})
-                print(f'{new_tokens}', end='', flush=True)
-
-        print(
-            f"\nLLM (say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} to interrupt) > ",
-            end='',
-            flush=True)
-        res = pllm.generate(
-            prompt=dialog.prompt(),
-            completion_token_limit=picollm_completion_token_limit,
-            stop_phrases=stop_phrases,
-            presence_penalty=picollm_presence_penalty,
-            frequency_penalty=picollm_frequency_penalty,
-            temperature=picollm_temperature,
-            top_p=picollm_top_p,
-            stream_callback=llm_callback)
-
-        if res.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED:
-            main_connection.send({'command': 'interrupt'})
-        else:
-            main_connection.send({'command': 'flush'})
-
-        print('\n')
-        dialog.add_llm_response(res.completion)
+    generate_process.start()
+    listen_process.start()
+    speak_process.start()
 
-        if profile:
-            print(f"[picoLLM TPS: {picollm_profiler.tps():.2f}]")
-
-        while not main_connection.poll():
-            time.sleep(0.01)
-        message = main_connection.recv()
-        if profile:
-            print(f"[Orca RTF: {message['rtf']:.2f}]")
-            print(f"[Delay: {message['delay']:.2f} sec]")
-        while not main_connection.poll():
-            time.sleep(0.01)
-        assert main_connection.recv()['done']
-
-        return res
-
-    wake_word_detected = False
-    user_request = ''
-    endpoint_reached = False
-
-    porcupine_profiler = RTFProfiler(porcupine.sample_rate)
-    cheetah_profiler = RTFProfiler(cheetah.sample_rate)
+    modules = [
+        'Porcupine',
+        'Cheetah',
+        'PvRecorder',
+        'picoLLM',
+        'Orca',
+        'PvSpeaker'
+    ]
 
     try:
-        while True:
-            if stop[0]:
-                break
-            elif not wake_word_detected:
-                pcm = mic.read()
-                porcupine_profiler.tick()
-                wake_word_detected = porcupine.process(pcm) == 0
-                porcupine_profiler.tock(pcm)
-                if wake_word_detected:
-                    if profile:
-                        print(f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]")
-                    print("$ Wake word detected, utter your request or question ...\n")
-                    print("User > ", end='', flush=True)
-            elif not endpoint_reached:
-                pcm = mic.read()
-                cheetah_profiler.tick()
-                partial_transcript, endpoint_reached = cheetah.process(pcm)
-                cheetah_profiler.tock(pcm)
-                print(partial_transcript, end='', flush=True)
-                user_request += partial_transcript
-                if endpoint_reached:
-                    utterance_end_sec = time.perf_counter()
-                    cheetah_profiler.tick()
-                    remaining_transcript = cheetah.flush()
-                    cheetah_profiler.tock()
-                    user_request += remaining_transcript
-                    print(remaining_transcript, end='\n')
-                    if profile:
-                        print(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]")
-                    with concurrent.futures.ThreadPoolExecutor() as executor:
-                        llm_future = executor.submit(
-                            llm_task,
-                            dialog,
-                            user_request,
-                            utterance_end_sec,
-                            main_connection)
-
-                        while not llm_future.done():
-                            pcm = mic.read()
-                            porcupine_profiler.tick()
-                            wake_word_detected = porcupine.process(pcm) == 0
-                            porcupine_profiler.tock(pcm)
-                            if wake_word_detected:
-                                pllm.interrupt()
-                                break
-
-                        llm_result = llm_future.result()
-                        if llm_result.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED:
-                            wake_word_detected = True
-                            print("$ Wake word detected, utter your request or question ...\n")
-                            print("User > ", end='', flush=True)
-                        else:
-                            wake_word_detected = False
-                            print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...")
-                        user_request = ''
-                        endpoint_reached = False
-
+        close = False
+        generating = False
+        while not close:
+            while main_queue.empty():
+                time.sleep(0.01)
+
+            message = main_queue.get(block=True)
+            if message['command'] == Commands.CLOSE:
+                close = True
+            elif message['command'] == 'init':
+                print(f"→ {message['name']} v{message['version']}")
+                modules.remove(message['name'])
+                if len(modules) == 0:
+                    main_queue.put({'command': Commands.START})
+                    listen_queue.put({'command': Commands.START})
+                    generate_queue.put({'command': Commands.START})
+                    speak_queue.put({'command': Commands.START})
+            elif message['command'] == Commands.START:
+                print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...")
+            elif message['command'] == Commands.INTERRUPT:
+                if generating:
+                    print()
+                    generating = False
+                print("$ Wake word detected, utter your request or question ...")
+                print("User > ", end='', flush=True)
+                generate_queue.put(message)
+                speak_queue.put(message)
+            elif message['command'] == Commands.TEXT:
+                print(message['text'], end='', flush=True)
+            elif message['command'] == Commands.GENERATE:
+                print()
+                generate_queue.put(message)
+            elif message['command'] == Commands.SYNTHESIZE_START:
+                print(f"LLM (say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} to interrupt) > ", end='', flush=True)
+                speak_queue.put(message)
+                generating = True
+            elif message['command'] == Commands.SYNTHESIZE:
+                print(message['text'], end='', flush=True)
+                speak_queue.put(message)
+            elif message['command'] == Commands.SYNTHESIZE_FLUSH:
+                print()
+                speak_queue.put(message)
+                generating = False
+            elif message['command'] == Commands.PROFILE:
+                if profile:
+                    print(message['text'])
     finally:
-        main_connection.send({'command': 'close'})
-        mic.delete()
-        pllm.release()
-        cheetah.delete()
-        porcupine.delete()
-        orca_process.join()
+        generate_queue.put({'command': Commands.INTERRUPT})
+        speak_queue.put({'command': Commands.INTERRUPT})
+
+        listen_queue.put({'command': Commands.CLOSE})
+        generate_queue.put({'command': Commands.CLOSE})
+        speak_queue.put({'command': Commands.CLOSE})
+
+        listen_process.join()
+        generate_process.join()
+        speak_process.join()
 
 
 if __name__ == '__main__':
-    main()
+    main()
\ No newline at end of file

From e5d66174b8403d3c9c5568c4ca3e65916b48a7f7 Mon Sep 17 00:00:00 2001
From: Matthew Maxwell <matthew@picovoice.ai>
Date: Thu, 5 Dec 2024 11:25:37 -0800
Subject: [PATCH 2/9] Improved state management

---
 recipes/llm-voice-assistant/python/main.py | 38 +++++++++++++---------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index ccb1df5..d059be2 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -15,6 +15,7 @@
 
 
 class Commands:
+    INIT = 'init'
     CLOSE = 'close'
     START = 'start'
     INTERRUPT = 'interrupt'
@@ -41,7 +42,10 @@ def tock(self, audio: Optional[Sequence[int]] = None) -> None:
         self._audio_sec += (len(audio) / self._sample_rate) if audio is not None else 0.
 
     def rtf(self) -> float:
-        rtf = self._compute_sec / self._audio_sec
+        if self._audio_sec > 0:
+            rtf = self._compute_sec / self._audio_sec
+        else:
+            rtf = 0
         self._compute_sec = 0.
         self._audio_sec = 0.
         return rtf
@@ -113,7 +117,7 @@ def handler(_, __) -> None:
         porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path])
     porcupine_profiler = RTFProfiler(porcupine.sample_rate)
 
-    main_queue.put({'command': 'init', 'name': 'Porcupine', 'version': porcupine.version})
+    main_queue.put({'command': Commands.INIT, 'name': 'Porcupine', 'version': porcupine.version})
 
     cheetah = pvcheetah.create(
         access_key=access_key,
@@ -121,16 +125,15 @@ def handler(_, __) -> None:
         enable_automatic_punctuation=True)
     cheetah_profiler = RTFProfiler(cheetah.sample_rate)
 
-    main_queue.put({'command': 'init', 'name': 'Cheetah', 'version': cheetah.version})
+    main_queue.put({'command': Commands.INIT, 'name': 'Cheetah', 'version': cheetah.version})
 
     mic = PvRecorder(frame_length=porcupine.frame_length)
     mic.start()
 
-    main_queue.put({'command': 'init', 'name': 'PvRecorder', 'version': mic.version})
+    main_queue.put({'command': Commands.INIT, 'name': 'PvRecorder', 'version': mic.version})
 
     while listen_queue.empty():
         time.sleep(0.01)
-    listen_queue.get()
 
     try:
         close = False
@@ -158,8 +161,9 @@ def handler(_, __) -> None:
                 cheetah_profiler.tick()
                 partial_transcript, endpoint_reached = cheetah.process(pcm)
                 cheetah_profiler.tock(pcm)
-                user_request += partial_transcript
-                main_queue.put({'command': Commands.TEXT, 'text': partial_transcript})
+                if len(partial_transcript) > 0:
+                    user_request += partial_transcript
+                    main_queue.put({'command': Commands.TEXT, 'text': partial_transcript})
                 if endpoint_reached:
                     utterance_end_sec = time.perf_counter()
                     cheetah_profiler.tick()
@@ -188,7 +192,7 @@ def handler(_, __) -> None:
     dialog = pllm.get_dialog()
     generating = False
 
-    main_queue.put({'command': 'init', 'name': 'picoLLM', 'version': f"{pllm.version} <{pllm.model}>"})
+    main_queue.put({'command': Commands.INIT, 'name': 'picoLLM', 'version': f"{pllm.version} <{pllm.model}>"})
 
     stop_phrases = {
         '</s>',  # Llama-2, Mistral, and Mixtral
@@ -238,7 +242,6 @@ def llm_task(user_request, utterance_end_sec):
 
     while generate_queue.empty():
         time.sleep(0.01)
-    generate_queue.get()
 
     try:
         close = False
@@ -284,15 +287,14 @@ def handler(_, __) -> None:
     orca_profiler = RTFProfiler(orca.sample_rate)
     warmup_size = int(warmup_sec * orca.sample_rate)
 
-    main_queue.put({'command': 'init', 'name': 'Orca', 'version': orca.version})
+    main_queue.put({'command': Commands.INIT, 'name': 'Orca', 'version': orca.version})
 
-    speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=20)
+    speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=1)
 
-    main_queue.put({'command': 'init', 'name': 'PvSpeaker', 'version': speaker.version})
+    main_queue.put({'command': Commands.INIT, 'name': 'PvSpeaker', 'version': speaker.version})
 
     while speak_queue.empty():
         time.sleep(0.01)
-    speak_queue.get()
 
     try:
         close = False
@@ -350,7 +352,7 @@ def handler(_, __) -> None:
                 synthesizing = False
                 if pcm is not None:
                     pcm_queue.extend(pcm)
-                    main_queue.put({'command': Commands.PROFILE, 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"})
+                main_queue.put({'command': Commands.PROFILE, 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"})
 
             if not speaking and len(pcm_queue) > warmup_size:
                 speaker.start()
@@ -508,6 +510,7 @@ def handler(_, __) -> None:
 
     try:
         close = False
+        listening = False
         generating = False
         while not close:
             while main_queue.empty():
@@ -516,7 +519,7 @@ def handler(_, __) -> None:
             message = main_queue.get(block=True)
             if message['command'] == Commands.CLOSE:
                 close = True
-            elif message['command'] == 'init':
+            elif message['command'] == Commands.INIT:
                 print(f"→ {message['name']} v{message['version']}")
                 modules.remove(message['name'])
                 if len(modules) == 0:
@@ -525,7 +528,8 @@ def handler(_, __) -> None:
                     generate_queue.put({'command': Commands.START})
                     speak_queue.put({'command': Commands.START})
             elif message['command'] == Commands.START:
-                print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...")
+                if not listening:
+                    print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...")
             elif message['command'] == Commands.INTERRUPT:
                 if generating:
                     print()
@@ -534,11 +538,13 @@ def handler(_, __) -> None:
                 print("User > ", end='', flush=True)
                 generate_queue.put(message)
                 speak_queue.put(message)
+                listening = True
             elif message['command'] == Commands.TEXT:
                 print(message['text'], end='', flush=True)
             elif message['command'] == Commands.GENERATE:
                 print()
                 generate_queue.put(message)
+                listening = False
             elif message['command'] == Commands.SYNTHESIZE_START:
                 print(f"LLM (say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} to interrupt) > ", end='', flush=True)
                 speak_queue.put(message)

From c05a53a31eb0121d5fb0fda43ff20b3edeab4da5 Mon Sep 17 00:00:00 2001
From: Matthew Maxwell <matthew@picovoice.ai>
Date: Thu, 5 Dec 2024 11:33:35 -0800
Subject: [PATCH 3/9] fixed codestyle and spellcheck

---
 recipes/llm-voice-assistant/python/main.py | 39 +++++++++++++++++-----
 res/.lint/spell-check/dict.txt             |  1 +
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index d059be2..ae71166 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -155,7 +155,9 @@ def handler(_, __) -> None:
                 porcupine_profiler.tock(pcm)
                 if wake_word_detected:
                     listening = True
-                    main_queue.put({'command': Commands.PROFILE, 'text': f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]"})
+                    main_queue.put({
+                        'command': Commands.PROFILE,
+                        'text': f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]"})
                     main_queue.put({'command': Commands.INTERRUPT})
             else:
                 cheetah_profiler.tick()
@@ -171,8 +173,13 @@ def handler(_, __) -> None:
                     cheetah_profiler.tock(pcm)
                     user_request += remaining_transcript
                     main_queue.put({'command': Commands.TEXT, 'text': remaining_transcript})
-                    main_queue.put({'command': Commands.GENERATE, 'text': user_request, 'utterance_end_sec': utterance_end_sec})
-                    main_queue.put({'command': Commands.PROFILE, 'text': f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]"})
+                    main_queue.put({
+                        'command': Commands.GENERATE,
+                        'text': user_request,
+                        'utterance_end_sec': utterance_end_sec})
+                    main_queue.put({
+                        'command': Commands.PROFILE,
+                        'text': f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]"})
                     user_request = ''
                     listening = False
     finally:
@@ -181,7 +188,18 @@ def handler(_, __) -> None:
         mic.delete()
 
 
-def generate_worker(main_queue, generate_queue, access_key, picollm_model_path, picollm_device, picollm_completion_token_limit, picollm_presence_penalty, picollm_frequency_penalty, picollm_temperature, picollm_top_p, short_answers):
+def generate_worker(
+        main_queue,
+        generate_queue,
+        access_key,
+        picollm_model_path,
+        picollm_device,
+        picollm_completion_token_limit,
+        picollm_presence_penalty,
+        picollm_frequency_penalty,
+        picollm_temperature,
+        picollm_top_p,
+        short_answers):
     def handler(_, __) -> None:
         main_queue.put({'command': Commands.CLOSE})
 
@@ -324,7 +342,9 @@ def handler(_, __) -> None:
                         orca_profiler.tick()
                         pcm = orca_stream.flush()
                         orca_profiler.tock(pcm)
-                        main_queue.put({'command': Commands.PROFILE, 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"})
+                        main_queue.put({
+                            'command': Commands.PROFILE,
+                            'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"})
                     if speaking:
                         speaker.stop()
                     text_queue.clear()
@@ -352,7 +372,9 @@ def handler(_, __) -> None:
                 synthesizing = False
                 if pcm is not None:
                     pcm_queue.extend(pcm)
-                main_queue.put({'command': Commands.PROFILE, 'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"})
+                main_queue.put({
+                    'command': Commands.PROFILE,
+                    'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"})
 
             if not speaking and len(pcm_queue) > warmup_size:
                 speaker.start()
@@ -546,7 +568,8 @@ def handler(_, __) -> None:
                 generate_queue.put(message)
                 listening = False
             elif message['command'] == Commands.SYNTHESIZE_START:
-                print(f"LLM (say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} to interrupt) > ", end='', flush=True)
+                wake_word = '`Picovoice`' if keyword_model_path is None else 'the wake word'
+                print(f"LLM (say {wake_word} to interrupt) > ", end='', flush=True)
                 speak_queue.put(message)
                 generating = True
             elif message['command'] == Commands.SYNTHESIZE:
@@ -573,4 +596,4 @@ def handler(_, __) -> None:
 
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()
diff --git a/res/.lint/spell-check/dict.txt b/res/.lint/spell-check/dict.txt
index b654661..7d553f8 100644
--- a/res/.lint/spell-check/dict.txt
+++ b/res/.lint/spell-check/dict.txt
@@ -24,6 +24,7 @@ picollm
 picovoice
 pids
 pllm
+popleft
 psutil
 pvcheetah
 pvorca

From 4d9dc0ed5157c06df140e59253261abdd5e8b77b Mon Sep 17 00:00:00 2001
From: Matthew Maxwell <matthew@picovoice.ai>
Date: Fri, 27 Dec 2024 15:37:04 -0800
Subject: [PATCH 4/9] updated to use same code as windows_gui demo

---
 recipes/llm-voice-assistant/python/main.py | 894 +++++++++++----------
 1 file changed, 474 insertions(+), 420 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index ae71166..2be1b65 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -1,11 +1,16 @@
+import json
+import os
 import signal
-import concurrent.futures
+import sys
 import time
 from argparse import ArgumentParser
-from collections import deque
-from multiprocessing import Process, Queue
+from concurrent.futures import ThreadPoolExecutor
+from itertools import chain
+from multiprocessing import Event, Pipe, Process, Queue, active_children
+from multiprocessing.connection import Connection
 from typing import Optional, Sequence
 
+
 import picollm
 import pvcheetah
 import pvorca
@@ -15,16 +20,13 @@
 
 
 class Commands:
-    INIT = 'init'
-    CLOSE = 'close'
     START = 'start'
-    INTERRUPT = 'interrupt'
-    TEXT = 'text'
-    GENERATE = 'generate'
-    SYNTHESIZE_START = 'synthesize-start'
+    CLOSE = 'close'
+    PROCESS = 'process'
     SYNTHESIZE = 'synthesize'
-    SYNTHESIZE_FLUSH = 'synthesize-flush'
-    PROFILE = 'profile'
+    SPEAK = 'speak'
+    FLUSH = 'flush'
+    INTERRUPT = 'interrupt'
 
 
 class RTFProfiler:
@@ -105,307 +107,438 @@ def get_new_tokens(self) -> str:
         return self.new_tokens
 
 
-def listen_worker(main_queue, listen_queue, access_key, keyword_model_path, cheetah_endpoint_duration_sec):
-    def handler(_, __) -> None:
-        main_queue.put({'command': Commands.CLOSE})
-
-    signal.signal(signal.SIGINT, handler)
-
-    if keyword_model_path is None:
-        porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice'])
-    else:
-        porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path])
-    porcupine_profiler = RTFProfiler(porcupine.sample_rate)
-
-    main_queue.put({'command': Commands.INIT, 'name': 'Porcupine', 'version': porcupine.version})
-
-    cheetah = pvcheetah.create(
-        access_key=access_key,
-        endpoint_duration_sec=cheetah_endpoint_duration_sec,
-        enable_automatic_punctuation=True)
-    cheetah_profiler = RTFProfiler(cheetah.sample_rate)
-
-    main_queue.put({'command': Commands.INIT, 'name': 'Cheetah', 'version': cheetah.version})
-
-    mic = PvRecorder(frame_length=porcupine.frame_length)
-    mic.start()
-
-    main_queue.put({'command': Commands.INIT, 'name': 'PvRecorder', 'version': mic.version})
+class Speaker:
+    def __init__(
+            self,
+            speaker: PvSpeaker,
+            orca_warmup_sec: int):
+        self.speaker = speaker
+        self.orca_warmup = self.speaker.sample_rate * orca_warmup_sec
+        self.started = False
+        self.speaking = False
+        self.flushing = False
+        self.pcmBuffer = []
+        self.executor = ThreadPoolExecutor()
+        self.future = None
+
+    def close(self):
+        self.executor.shutdown()
+
+    def start(self):
+        self.started = True
+
+    def process(self, pcm: Optional[Sequence[int]]):
+        if self.started and pcm is not None:
+            self.pcmBuffer.extend(pcm)
+
+    def flush(self):
+        self.flushing = True
+
+    def interrupt(self):
+        self.started = False
+        if self.speaking:
+            self.speaking = False
+            self.flushing = False
+            self.pcmBuffer.clear()
+            self.speaker.stop()
+
+    def tick(self):
+        def stop():
+            self.speaker.flush()
+            self.speaker.stop()
+        if not self.speaking and len(self.pcmBuffer) > self.orca_warmup:
+            self.speaking = True
+            self.speaker.start()
+        if self.speaking and len(self.pcmBuffer) > 0:
+            written = self.speaker.write(self.pcmBuffer)
+            if written > 0:
+                del self.pcmBuffer[:written]
+        elif self.speaking and self.flushing and len(self.pcmBuffer) == 0:
+            self.started = False
+            self.speaking = False
+            self.flushing = False
+            self.future = self.executor.submit(stop)
+        if self.future and self.future.done():
+            self.future = None
+            ppn_prompt = config['ppn_prompt']
+            print(f'$ Say {ppn_prompt} ...', flush=True)
+
+
+class Synthesizer:
+    def __init__(
+            self,
+            speaker: Speaker,
+            orca_connection: Connection,
+            orca_process: Process):
+        self.speaker = speaker
+        self.orca_connection = orca_connection
+        self.orca_process = orca_process
+
+    def close(self):
+        self.orca_connection.send({'command': Commands.CLOSE})
+        self.orca_process.join()
+
+    def start(self):
+        self.speaker.start()
+        self.orca_connection.send({'command': Commands.START})
+
+    def process(self, text: str):
+        self.orca_connection.send({'command': Commands.PROCESS, 'text': text})
+
+    def flush(self):
+        self.orca_connection.send({'command': Commands.FLUSH})
+
+    def interrupt(self):
+        self.orca_connection.send({'command': Commands.INTERRUPT})
+        while self.orca_connection.poll() and self.orca_connection.recv()['command'] != Commands.INTERRUPT:
+            time.sleep(0.01)
+        self.speaker.interrupt()
+
+    def tick(self):
+        while self.orca_connection.poll():
+            message = self.orca_connection.recv()
+            if message['command'] == Commands.SPEAK:
+                self.speaker.process(message['pcm'])
+            elif message['command'] == Commands.FLUSH:
+                self.speaker.flush()
+
+    @staticmethod
+    def create_worker(config):
+        main_connection, process_connection = Pipe()
+        process = Process(target=Synthesizer.worker, args=(process_connection, config))
+        process.start()
+        return main_connection, process
+
+    @staticmethod
+    def worker(connection: Connection, config):
+        def handler(_, __) -> None:
+            pass
+        signal.signal(signal.SIGINT, handler)
+
+        orca = pvorca.create(access_key=config['access_key'])
+        orca_stream = orca.stream_open()
+        connection.send(orca.sample_rate)
+
+        try:
+            close = False
+            synthesizing = False
+            flushing = False
+            text_queue = Queue()
+            while not close:
+                while connection.poll():
+                    message = connection.recv()
+                    if message['command'] == Commands.CLOSE:
+                        close = True
+                    elif message['command'] == Commands.START:
+                        synthesizing = True
+                    elif message['command'] == Commands.PROCESS:
+                        if synthesizing:
+                            text_queue.put(message['text'])
+                    elif message['command'] == Commands.FLUSH:
+                        flushing = True
+                    elif message['command'] == Commands.INTERRUPT:
+                        synthesizing = False
+                        flushing = False
+                        while not text_queue.empty():
+                            text_queue.get()
+                        orca_stream.flush()
+                        connection.send({'command': Commands.INTERRUPT})
+                if not text_queue.empty():
+                    text = text_queue.get()
+                    pcm = orca_stream.synthesize(text)
+                    if pcm is not None:
+                        connection.send({'command': Commands.SPEAK, 'pcm': pcm})
+                if synthesizing and flushing and text_queue.empty():
+                    synthesizing = False
+                    flushing = False
+                    pcm = orca_stream.flush()
+                    connection.send({'command': Commands.SPEAK, 'pcm': pcm})
+                    connection.send({'command': Commands.FLUSH})
+                elif flushing:
+                    flushing = False
+        finally:
+            orca_stream.close()
+            orca.delete()
+
+
+class Generator:
+    def __init__(
+            self,
+            synthesizer: Synthesizer,
+            pllm_connection: Connection,
+            pllm_process: Process):
+        self.synthesizer = synthesizer
+        self.pllm_connection = pllm_connection
+        self.pllm_process = pllm_process
+
+    def close(self):
+        self.pllm_connection.send({'command': Commands.CLOSE})
+        self.pllm_process.join()
+
+    def process(self, text: str):
+        ppn_prompt = config['ppn_prompt']
+        print(f'LLM (say ${ppn_prompt} to interrupt) > ', end='', flush=True)
+
+        self.synthesizer.start()
+        self.pllm_connection.send({'command': Commands.PROCESS, 'text': text})
+
+    def interrupt(self):
+        self.pllm_connection.send({'command': Commands.INTERRUPT})
+        while self.pllm_connection.poll() and self.pllm_connection.recv()['command'] != Commands.INTERRUPT:
+            time.sleep(0.01)
+        print('', flush=True)
+        self.synthesizer.interrupt()
 
-    while listen_queue.empty():
-        time.sleep(0.01)
+    def tick(self):
+        while self.pllm_connection.poll():
+            message = self.pllm_connection.recv()
+            if message['command'] == Commands.SYNTHESIZE:
+                print(message['text'], end='', flush=True)
+                self.synthesizer.process(message['text'])
+            elif message['command'] == Commands.FLUSH:
+                print('', flush=True)
+                self.synthesizer.flush()
+
+    @staticmethod
+    def create_worker(config):
+        main_connection, process_connection = Pipe()
+        process = Process(target=Generator.worker, args=(process_connection, config))
+        process.start()
+        return main_connection, process
+
+    @staticmethod
+    def worker(connection: Connection, config):
+        def handler(_, __) -> None:
+            pass
+        signal.signal(signal.SIGINT, handler)
+
+        pllm = picollm.create(
+            access_key=config['access_key'],
+            model_path=config['picollm_model_path'],
+            device=config['picollm_device'])
+        if config['picollm_system_prompt'] is not None:
+            dialog = pllm.get_dialog(system=config['picollm_system_prompt'])
+        else:
+            dialog = pllm.get_dialog()
+        generating = False
 
-    try:
-        close = False
-        listening = False
-        user_request = ''
-        while not close:
-            if listen_queue.empty():
+        stop_phrases = {
+            '</s>',  # Llama-2, Mistral, and Mixtral
+            '<end_of_turn>',  # Gemma
+            '<|endoftext|>',  # Phi-2
+            '<|eot_id|>',  # Llama-3
+            '<|end|>', '<|user|>', '<|assistant|>',  # Phi-3
+        }
+        completion = CompletionText(stop_phrases)
+
+        def llm_callback(text):
+            if generating:
+                completion.append(text)
+                new_tokens = completion.get_new_tokens()
+                if len(new_tokens) > 0:
+                    connection.send({'command': Commands.SYNTHESIZE, 'text': new_tokens})
+
+        def llm_task(text):
+            short_answers_instruction = \
+                "You are a voice assistant and your answers are very short but informative"
+            dialog.add_human_request(
+                f"{short_answers_instruction}. {text}" if config['short_answers'] else text)
+
+            completion.reset()
+            return pllm.generate(
+                prompt=dialog.prompt(),
+                completion_token_limit=config['picollm_completion_token_limit'],
+                stop_phrases=stop_phrases,
+                presence_penalty=config['picollm_presence_penalty'],
+                frequency_penalty=config['picollm_frequency_penalty'],
+                temperature=config['picollm_temperature'],
+                top_p=config['picollm_top_p'],
+                stream_callback=llm_callback)
+
+        try:
+            close = False
+            executor = ThreadPoolExecutor()
+            llm_future = None
+            interrupting = False
+            while not close:
+                while connection.poll():
+                    message = connection.recv()
+                    if message['command'] == Commands.CLOSE:
+                        close = True
+                    elif message['command'] == Commands.PROCESS:
+                        generating = True
+                        text = message['text']
+                        llm_future = executor.submit(llm_task, text)
+                    elif message['command'] == Commands.INTERRUPT:
+                        interrupting = True
+                        generating = False
+                        pllm.interrupt()
+                if llm_future and llm_future.done():
+                    generating = False
+                    llm_result = llm_future.result()
+                    dialog.add_llm_response(llm_result.completion)
+                    if llm_result.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED:
+                        interrupting = False
+                        connection.send({'command': Commands.INTERRUPT})
+                    else:
+                        connection.send({'command': Commands.FLUSH})
+                    llm_future = None
+                if not llm_future and interrupting:
+                    interrupting = False
+                    connection.send({'command': Commands.INTERRUPT})
+        finally:
+            while llm_future and llm_future.done():
                 time.sleep(0.01)
+            del executor
+            pllm.release()
+
+
+class Listener:
+    def __init__(
+            self,
+            generator: Generator,
+            porcupine: pvporcupine.Porcupine,
+            cheetah: pvcheetah.Cheetah):
+        self.generator = generator
+        self.porcupine = porcupine
+        self.cheetah = cheetah
+
+        self.sleeping = True
+        self.listening = False
+        self.user_request = ''
+        self.tick_count = 0
+
+    def close(self):
+        pass
+
+    def process(self, pcm: Optional[Sequence[int]]):
+        if self.sleeping:
+            if self.porcupine.process(pcm) == 0:
+                self.sleeping = False
+                self.tick_count = 4
+                self.generator.interrupt()
+        elif self.listening:
+            partial_transcript, endpoint_reached = self.cheetah.process(pcm)
+            if len(partial_transcript) > 0:
+                self.user_request += partial_transcript
+                print(partial_transcript, end='', flush=True)
+            if endpoint_reached:
+                self.sleeping = True
+                self.listening = False
+                remaining_transcript = self.cheetah.flush()
+                if len(remaining_transcript) > 0:
+                    self.user_request += remaining_transcript
+                print(remaining_transcript, flush=True)
+                self.generator.process(self.user_request)
+                self.user_request = ''
+        elif self.tick_count > 0:
+            self.tick_count -= 1
+        else:
+            self.listening = True
+            print('$ Wake word detected, utter your request or question ...', flush=True)
+            print('User > ', end='', flush=True)
+
+
+class Recorder:
+    def __init__(
+            self,
+            listener: Listener,
+            recorder: PvRecorder):
+        self.listener = listener
+        self.recorder = recorder
+        self.recording = False
+
+    def close(self):
+        if self.recording:
+            self.recorder.stop()
+
+    def tick(self):
+        if not self.recording:
+            self.recording = True
+            self.recorder.start()
+        pcm = self.recorder.read()
+        self.listener.process(pcm)
+
+def main(config):
+    stop = [False]
 
-            while not listen_queue.empty():
-                message = listen_queue.get()
-                if message['command'] == Commands.CLOSE:
-                    close = True
-
-            pcm = mic.read()
-            if not listening:
-                porcupine_profiler.tick()
-                wake_word_detected = porcupine.process(pcm) == 0
-                porcupine_profiler.tock(pcm)
-                if wake_word_detected:
-                    listening = True
-                    main_queue.put({
-                        'command': Commands.PROFILE,
-                        'text': f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]"})
-                    main_queue.put({'command': Commands.INTERRUPT})
-            else:
-                cheetah_profiler.tick()
-                partial_transcript, endpoint_reached = cheetah.process(pcm)
-                cheetah_profiler.tock(pcm)
-                if len(partial_transcript) > 0:
-                    user_request += partial_transcript
-                    main_queue.put({'command': Commands.TEXT, 'text': partial_transcript})
-                if endpoint_reached:
-                    utterance_end_sec = time.perf_counter()
-                    cheetah_profiler.tick()
-                    remaining_transcript = cheetah.flush()
-                    cheetah_profiler.tock(pcm)
-                    user_request += remaining_transcript
-                    main_queue.put({'command': Commands.TEXT, 'text': remaining_transcript})
-                    main_queue.put({
-                        'command': Commands.GENERATE,
-                        'text': user_request,
-                        'utterance_end_sec': utterance_end_sec})
-                    main_queue.put({
-                        'command': Commands.PROFILE,
-                        'text': f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]"})
-                    user_request = ''
-                    listening = False
-    finally:
-        porcupine.delete()
-        cheetah.delete()
-        mic.delete()
-
-
-def generate_worker(
-        main_queue,
-        generate_queue,
-        access_key,
-        picollm_model_path,
-        picollm_device,
-        picollm_completion_token_limit,
-        picollm_presence_penalty,
-        picollm_frequency_penalty,
-        picollm_temperature,
-        picollm_top_p,
-        short_answers):
     def handler(_, __) -> None:
-        main_queue.put({'command': Commands.CLOSE})
-
+        stop[0] = True
     signal.signal(signal.SIGINT, handler)
 
-    pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device)
-    pllm_profiler = TPSProfiler()
-    dialog = pllm.get_dialog()
-    generating = False
-
-    main_queue.put({'command': Commands.INIT, 'name': 'picoLLM', 'version': f"{pllm.version} <{pllm.model}>"})
-
-    stop_phrases = {
-        '</s>',  # Llama-2, Mistral, and Mixtral
-        '<end_of_turn>',  # Gemma
-        '<|endoftext|>',  # Phi-2
-        '<|eot_id|>',  # Llama-3
-        '<|end|>', '<|user|>', '<|assistant|>',  # Phi-3
-    }
-
-    completion = CompletionText(stop_phrases)
+    pllm_connection, pllm_process = Generator.create_worker(config)
+    orca_connection, orca_process = Synthesizer.create_worker(config)
 
-    def llm_callback(text: str):
-        pllm_profiler.tock()
-        completion.append(text)
-        new_tokens = completion.get_new_tokens()
-        if len(new_tokens) > 0 and generating:
-            main_queue.put({'command': Commands.SYNTHESIZE, 'text': new_tokens})
-
-    def llm_task(user_request, utterance_end_sec):
-        short_answers_instruction = \
-            "You are a voice assistant and your answers are very short but informative"
-        dialog.add_human_request(
-            f"{short_answers_instruction}. {user_request}" if short_answers else user_request)
-
-        main_queue.put({'command': Commands.SYNTHESIZE_START, 'utterance_end_sec': utterance_end_sec})
-
-        res = pllm.generate(
-            prompt=dialog.prompt(),
-            completion_token_limit=picollm_completion_token_limit,
-            stop_phrases=stop_phrases,
-            presence_penalty=picollm_presence_penalty,
-            frequency_penalty=picollm_frequency_penalty,
-            temperature=picollm_temperature,
-            top_p=picollm_top_p,
-            stream_callback=llm_callback)
-
-        dialog.add_llm_response(res.completion)
-
-        if res.endpoint != picollm.PicoLLMEndpoints.INTERRUPTED:
-            main_queue.put({'command': Commands.SYNTHESIZE_FLUSH})
+    if 'keyword_model_path' not in config:
+        porcupine = pvporcupine.create(
+            access_key=config['access_key'],
+            keywords=['picovoice'],
+            sensitivities=[config['porcupine_sensitivity']])
+        config['ppn_prompt'] = '`Picovoice`'
+    else:
+        porcupine = pvporcupine.create(
+            access_key=config['access_key'],
+            keyword_paths=[config['keyword_model_path']],
+            sensitivities=[config['porcupine_sensitivity']])
+        config['ppn_prompt'] = 'the wake word'
 
-        main_queue.put({'command': Commands.PROFILE, 'text': f"[picoLLM TPS: {pllm_profiler.tps():.2f}]"})
+    cheetah = pvcheetah.create(
+        access_key=config['access_key'],
+        endpoint_duration_sec=config['cheetah_endpoint_duration_sec'],
+        enable_automatic_punctuation=True)
 
-        return res
+    pv_recorder = PvRecorder(frame_length=porcupine.frame_length)
+    pv_speaker = PvSpeaker(sample_rate=int(orca_connection.recv()), bits_per_sample=16, buffer_size_secs=1)
 
-    executor = concurrent.futures.ThreadPoolExecutor()
+    speaker = Speaker(pv_speaker, config['orca_warmup_sec'])
+    synthesizer = Synthesizer(speaker, orca_connection, orca_process)
+    generator = Generator(synthesizer, pllm_connection, pllm_process)
+    listener = Listener(generator, porcupine, cheetah)
+    recorder = Recorder(listener, pv_recorder)
 
-    while generate_queue.empty():
-        time.sleep(0.01)
+    ppn_prompt = config['ppn_prompt']
+    print(f'$ Say {ppn_prompt} ...', flush=True)
 
     try:
-        close = False
-        llm_future = None
-        while not close:
-            if generate_queue.empty():
-                time.sleep(0.01)
-
-            while not generate_queue.empty():
-                message = generate_queue.get()
-                if message['command'] == Commands.CLOSE:
-                    close = True
-                elif message['command'] == Commands.GENERATE:
-                    generating = True
-                    completion.reset()
-                    llm_future = executor.submit(
-                        llm_task,
-                        message['text'],
-                        message['utterance_end_sec'])
-                elif message['command'] == Commands.INTERRUPT and generating:
-                    generating = False
-                    pllm.interrupt()
-
-                if llm_future and llm_future.done():
-                    llm_future = None
-                    generating = False
+        while not stop[0]:
+            recorder.tick()
+            generator.tick()
+            synthesizer.tick()
+            speaker.tick()
     finally:
-        while llm_future and not llm_future.done():
-            time.sleep(0.01)
-
-        executor.shutdown(True)
-        pllm.release()
-
-
-def speak_worker(main_queue, speak_queue, access_key, warmup_sec):
-    def handler(_, __) -> None:
-        main_queue.put({'command': Commands.CLOSE})
-
-    signal.signal(signal.SIGINT, handler)
-
-    orca = pvorca.create(access_key=access_key)
-    orca_stream = orca.stream_open()
-    orca_profiler = RTFProfiler(orca.sample_rate)
-    warmup_size = int(warmup_sec * orca.sample_rate)
+        generator.interrupt()
+        generator.tick()
+        synthesizer.tick()
+        speaker.tick()
+        recorder.close()
+        listener.close()
+        generator.close()
+        synthesizer.close()
+        speaker.close()
+
+        for child in active_children():
+            child.terminate()
 
-    main_queue.put({'command': Commands.INIT, 'name': 'Orca', 'version': orca.version})
-
-    speaker = PvSpeaker(sample_rate=orca.sample_rate, bits_per_sample=16, buffer_size_secs=1)
-
-    main_queue.put({'command': Commands.INIT, 'name': 'PvSpeaker', 'version': speaker.version})
-
-    while speak_queue.empty():
-        time.sleep(0.01)
-
-    try:
-        close = False
-        synthesizing = False
-        speaking = False
-        flush = False
-        text_queue = deque()
-        pcm_queue = list()
-        delay_sec = -1
-        utterance_end_sec = 0
-        while not close:
-            if speak_queue.empty():
-                time.sleep(0.01)
+        porcupine.delete()
+        cheetah.delete()
+        pv_recorder.delete()
+        pv_speaker.delete()
 
-            while not speak_queue.empty():
-                message = speak_queue.get()
-                if message['command'] == Commands.CLOSE:
-                    close = True
-                elif message['command'] == Commands.SYNTHESIZE_START:
-                    synthesizing = True
-                    utterance_end_sec = message['utterance_end_sec']
-                    delay_sec = -1
-                elif message['command'] == Commands.SYNTHESIZE:
-                    text_queue.append(message['text'].replace('\n', ' . '))
-                elif message['command'] == Commands.INTERRUPT:
-                    if synthesizing:
-                        orca_profiler.tick()
-                        pcm = orca_stream.flush()
-                        orca_profiler.tock(pcm)
-                        main_queue.put({
-                            'command': Commands.PROFILE,
-                            'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"})
-                    if speaking:
-                        speaker.stop()
-                    text_queue.clear()
-                    pcm_queue.clear()
-                    synthesizing = False
-                    speaking = False
-                    flush = False
-                elif message['command'] == Commands.SYNTHESIZE_FLUSH:
-                    flush = True
-
-            while len(text_queue) > 0:
-                text = text_queue.popleft()
-                orca_profiler.tick()
-                pcm = orca_stream.synthesize(text)
-                orca_profiler.tock(pcm)
-                if pcm is not None:
-                    if delay_sec == -1:
-                        delay_sec = time.perf_counter() - utterance_end_sec
-                    pcm_queue.extend(pcm)
-
-            if flush and synthesizing:
-                orca_profiler.tick()
-                pcm = orca_stream.flush()
-                orca_profiler.tock(pcm)
-                synthesizing = False
-                if pcm is not None:
-                    pcm_queue.extend(pcm)
-                main_queue.put({
-                    'command': Commands.PROFILE,
-                    'text': f"[Orca RTF: {orca_profiler.rtf():.2f}]\n[Delay: {delay_sec:.2f} sec]"})
-
-            if not speaking and len(pcm_queue) > warmup_size:
-                speaker.start()
-                speaking = True
-
-            if speaking and len(pcm_queue) > 0:
-                written = speaker.write(pcm_queue)
-                if written > 0:
-                    del pcm_queue[:written]
-
-            if speaking and flush and len(pcm_queue) == 0:
-                speaker.flush(pcm_queue)
-                speaker.stop()
-                speaking = False
-                flush = False
-                main_queue.put({'command': Commands.START})
-    finally:
-        orca_stream.close()
-        orca.delete()
-        speaker.delete()
 
+if __name__ == '__main__':
+    if not sys.platform.lower().startswith('win'):
+        print('Error: Only runs on Windows platforms')
+        exit(1)
 
-def main() -> None:
     parser = ArgumentParser()
+    parser.add_argument(
+        '--config',
+        help='path to a json config file to load the arguments from')
     parser.add_argument(
         '--access_key',
-        required=True,
         help='`AccessKey` obtained from `Picovoice Console` (https://console.picovoice.ai/).')
     parser.add_argument(
         '--picollm_model_path',
-        required=True,
         help='Absolute path to the file containing LLM parameters (`.pllm`).')
     parser.add_argument(
         '--keyword-model_path',
@@ -413,7 +546,6 @@ def main() -> None:
     parser.add_argument(
         '--cheetah_endpoint_duration_sec',
         type=float,
-        default=1.,
         help="Duration of silence (pause) after the user's utterance to consider it the end of the utterance.")
     parser.add_argument(
         '--picollm_device',
@@ -426,24 +558,20 @@ def main() -> None:
     parser.add_argument(
         '--picollm_completion_token_limit',
         type=int,
-        default=256,
         help="Maximum number of tokens in the completion. Set to `None` to impose no limit.")
     parser.add_argument(
         '--picollm_presence_penalty',
         type=float,
-        default=0.,
         help="It penalizes logits already appearing in the partial completion if set to a positive value. If set to "
              "`0.0`, it has no effect.")
     parser.add_argument(
         '--picollm_frequency_penalty',
         type=float,
-        default=0.,
         help="If set to a positive floating-point value, it penalizes logits proportional to the frequency of their "
              "appearance in the partial completion. If set to `0.0`, it has no effect.")
     parser.add_argument(
         '--picollm_temperature',
         type=float,
-        default=0.,
         help="Sampling temperature. Temperature is a non-negative floating-point value that controls the randomness of "
              "the sampler. A higher temperature smoothens the samplers' output, increasing the randomness. In "
              "contrast, a lower temperature creates a narrower distribution and reduces variability. Setting it to "
@@ -451,149 +579,75 @@ def main() -> None:
     parser.add_argument(
         '--picollm_top_p',
         type=float,
-        default=1.,
         help="A positive floating-point number within (0, 1]. It restricts the sampler's choices to high-probability "
              "logits that form the `top_p` portion of the probability mass. Hence, it avoids randomly selecting "
              "unlikely logits. A value of `1.` enables the sampler to pick any token with non-zero probability, "
              "turning off the feature.")
+    parser.add_argument(
+        '--picollm_system_prompt',
+        type=str,
+        help="A text prompt to give to the llm prior to it's input to instruct it on how to behave."
+    )
     parser.add_argument(
         '--orca_warmup_sec',
         type=float,
-        default=0.,
         help="Duration of the synthesized audio to buffer before streaming it out. A higher value helps slower "
              "(e.g., Raspberry Pi) to keep up with real-time at the cost of increasing the initial delay.")
-    parser.add_argument('--profile', action='store_true', help='Show runtime profiling information.')
+    parser.add_argument(
+        '--porcupine_sensitivity',
+        type=float,
+        help="Sensitivity for detecting keywords.")
     parser.add_argument('--short_answers', action='store_true')
+    parser.add_argument('--profile', action='store_true', help='Show runtime profiling information.')
     args = parser.parse_args()
 
-    access_key = args.access_key
-    picollm_model_path = args.picollm_model_path
-    keyword_model_path = args.keyword_model_path
-    cheetah_endpoint_duration_sec = args.cheetah_endpoint_duration_sec
-    picollm_device = args.picollm_device
-    picollm_completion_token_limit = args.picollm_completion_token_limit
-    picollm_presence_penalty = args.picollm_presence_penalty
-    picollm_frequency_penalty = args.picollm_frequency_penalty
-    picollm_temperature = args.picollm_temperature
-    picollm_top_p = args.picollm_top_p
-    orca_warmup_sec = args.orca_warmup_sec
-    profile = args.profile
-    short_answers = args.short_answers
-
-    main_queue = Queue()
-    listen_queue = Queue()
-    generate_queue = Queue()
-    speak_queue = Queue()
-
-    listen_process = Process(target=listen_worker, args=(
-        main_queue,
-        listen_queue,
-        access_key,
-        keyword_model_path,
-        cheetah_endpoint_duration_sec
-    ))
-    generate_process = Process(target=generate_worker, args=(
-        main_queue,
-        generate_queue,
-        access_key,
-        picollm_model_path,
-        picollm_device,
-        picollm_completion_token_limit,
-        picollm_presence_penalty,
-        picollm_frequency_penalty,
-        picollm_temperature,
-        picollm_top_p,
-        short_answers
-    ))
-    speak_process = Process(target=speak_worker, args=(
-        main_queue,
-        speak_queue,
-        access_key,
-        orca_warmup_sec
-    ))
-
-    def handler(_, __) -> None:
-        main_queue.put({'command': Commands.CLOSE})
-
-    signal.signal(signal.SIGINT, handler)
+    if args.config is not None:
+        config_path = os.path.realpath(args.config)
+    else:
+        config_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'config.json')
+
+    if os.path.exists(config_path):
+        with open(config_path, 'r') as fd:
+            config = json.load(fd)
+    elif args.config is not None:
+        print(parser.error(f'File {config_path} does not exist'))
+        exit(1)
+    else:
+        config = {}
 
-    generate_process.start()
-    listen_process.start()
-    speak_process.start()
-
-    modules = [
-        'Porcupine',
-        'Cheetah',
-        'PvRecorder',
-        'picoLLM',
-        'Orca',
-        'PvSpeaker'
+    REQUIRED_ARGS = [
+        'access_key',
+        'picollm_model_path'
     ]
+    DEFAULT_ARGS = {
+        'access_key': '',
+        'picollm_model_path': '',
+        'cheetah_endpoint_duration_sec': 1,
+        'picollm_device': 'best',
+        'picollm_completion_token_limit': 256,
+        'picollm_presence_penalty': 0,
+        'picollm_frequency_penalty': 0,
+        'picollm_temperature': 0,
+        'picollm_top_p': 1,
+        'picollm_system_prompt': None,
+        'orca_warmup_sec': 0,
+        'porcupine_sensitivity': 0.5,
+        'short_answers': False,
+        'profile': False
+    }
 
-    try:
-        close = False
-        listening = False
-        generating = False
-        while not close:
-            while main_queue.empty():
-                time.sleep(0.01)
-
-            message = main_queue.get(block=True)
-            if message['command'] == Commands.CLOSE:
-                close = True
-            elif message['command'] == Commands.INIT:
-                print(f"→ {message['name']} v{message['version']}")
-                modules.remove(message['name'])
-                if len(modules) == 0:
-                    main_queue.put({'command': Commands.START})
-                    listen_queue.put({'command': Commands.START})
-                    generate_queue.put({'command': Commands.START})
-                    speak_queue.put({'command': Commands.START})
-            elif message['command'] == Commands.START:
-                if not listening:
-                    print(f"$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...")
-            elif message['command'] == Commands.INTERRUPT:
-                if generating:
-                    print()
-                    generating = False
-                print("$ Wake word detected, utter your request or question ...")
-                print("User > ", end='', flush=True)
-                generate_queue.put(message)
-                speak_queue.put(message)
-                listening = True
-            elif message['command'] == Commands.TEXT:
-                print(message['text'], end='', flush=True)
-            elif message['command'] == Commands.GENERATE:
-                print()
-                generate_queue.put(message)
-                listening = False
-            elif message['command'] == Commands.SYNTHESIZE_START:
-                wake_word = '`Picovoice`' if keyword_model_path is None else 'the wake word'
-                print(f"LLM (say {wake_word} to interrupt) > ", end='', flush=True)
-                speak_queue.put(message)
-                generating = True
-            elif message['command'] == Commands.SYNTHESIZE:
-                print(message['text'], end='', flush=True)
-                speak_queue.put(message)
-            elif message['command'] == Commands.SYNTHESIZE_FLUSH:
-                print()
-                speak_queue.put(message)
-                generating = False
-            elif message['command'] == Commands.PROFILE:
-                if profile:
-                    print(message['text'])
-    finally:
-        generate_queue.put({'command': Commands.INTERRUPT})
-        speak_queue.put({'command': Commands.INTERRUPT})
-
-        listen_queue.put({'command': Commands.CLOSE})
-        generate_queue.put({'command': Commands.CLOSE})
-        speak_queue.put({'command': Commands.CLOSE})
+    for key in chain(REQUIRED_ARGS, DEFAULT_ARGS):
+        arg = getattr(args, key)
+        if arg is not None:
+            config[key] = arg
 
-        listen_process.join()
-        generate_process.join()
-        speak_process.join()
+    missing = [f'--{arg}' for arg in REQUIRED_ARGS if arg not in config]
+    if len(missing) > 0:
+        print(parser.error('the following arguments are required: ' + ', '.join(missing)))
+        exit(1)
 
+    for key in DEFAULT_ARGS:
+        if key not in config:
+            config[key] = DEFAULT_ARGS[key]
 
-if __name__ == '__main__':
-    main()
+    main(config)

From 84bf62c8398f518479a3cd7e314dc1058ad9f2f5 Mon Sep 17 00:00:00 2001
From: Matthew Maxwell <matthew@picovoice.ai>
Date: Mon, 30 Dec 2024 12:46:08 -0800
Subject: [PATCH 5/9] updated to include print statements

---
 .../python/{ => cli}/README.md                |   0
 .../python/{ => cli}/main.py                  | 112 +++++++++++++++---
 .../python/cli/requirements.txt               |   6 +
 .../python/windows_gui/README.md              |  45 +++++++
 .../{windows_gui.py => windows_gui/main.py}   |   0
 .../python/{ => windows_gui}/requirements.txt |   0
 6 files changed, 144 insertions(+), 19 deletions(-)
 rename recipes/llm-voice-assistant/python/{ => cli}/README.md (100%)
 rename recipes/llm-voice-assistant/python/{ => cli}/main.py (85%)
 create mode 100644 recipes/llm-voice-assistant/python/cli/requirements.txt
 create mode 100644 recipes/llm-voice-assistant/python/windows_gui/README.md
 rename recipes/llm-voice-assistant/python/{windows_gui.py => windows_gui/main.py} (100%)
 rename recipes/llm-voice-assistant/python/{ => windows_gui}/requirements.txt (100%)

diff --git a/recipes/llm-voice-assistant/python/README.md b/recipes/llm-voice-assistant/python/cli/README.md
similarity index 100%
rename from recipes/llm-voice-assistant/python/README.md
rename to recipes/llm-voice-assistant/python/cli/README.md
diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/cli/main.py
similarity index 85%
rename from recipes/llm-voice-assistant/python/main.py
rename to recipes/llm-voice-assistant/python/cli/main.py
index 2be1b65..50bb1c8 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/cli/main.py
@@ -52,6 +52,11 @@ def rtf(self) -> float:
         self._audio_sec = 0.
         return rtf
 
+    def reset(self) -> None:
+        self._compute_sec = 0.
+        self._audio_sec = 0.
+        self._tick_sec = 0.
+
 
 class TPSProfiler(object):
     def __init__(self) -> None:
@@ -70,6 +75,10 @@ def tps(self) -> float:
         self._start_sec = 0.
         return tps
 
+    def reset(self) -> None:
+        self._num_tokens = 0
+        self._start_sec = 0.
+
 
 class CompletionText(object):
     def __init__(self, stop_phrases: list) -> None:
@@ -111,9 +120,10 @@ class Speaker:
     def __init__(
             self,
             speaker: PvSpeaker,
-            orca_warmup_sec: int):
+            config):
         self.speaker = speaker
-        self.orca_warmup = self.speaker.sample_rate * orca_warmup_sec
+        self.config = config
+        self.orca_warmup = self.speaker.sample_rate * self.config['orca_warmup_sec']
         self.started = False
         self.speaking = False
         self.flushing = False
@@ -160,7 +170,7 @@ def stop():
             self.future = self.executor.submit(stop)
         if self.future and self.future.done():
             self.future = None
-            ppn_prompt = config['ppn_prompt']
+            ppn_prompt = self.config['ppn_prompt']
             print(f'$ Say {ppn_prompt} ...', flush=True)
 
 
@@ -169,18 +179,20 @@ def __init__(
             self,
             speaker: Speaker,
             orca_connection: Connection,
-            orca_process: Process):
+            orca_process: Process,
+            config):
         self.speaker = speaker
         self.orca_connection = orca_connection
         self.orca_process = orca_process
+        self.config = config
 
     def close(self):
         self.orca_connection.send({'command': Commands.CLOSE})
         self.orca_process.join()
 
-    def start(self):
+    def start(self, utterance_end_sec):
         self.speaker.start()
-        self.orca_connection.send({'command': Commands.START})
+        self.orca_connection.send({'command': Commands.START, 'utterance_end_sec': utterance_end_sec})
 
     def process(self, text: str):
         self.orca_connection.send({'command': Commands.PROCESS, 'text': text})
@@ -200,6 +212,11 @@ def tick(self):
             if message['command'] == Commands.SPEAK:
                 self.speaker.process(message['pcm'])
             elif message['command'] == Commands.FLUSH:
+                if self.config['profile']:
+                    rtf = message['profile']
+                    delay = message['delay']
+                    print(f'[Orca RTF: {round(rtf, 2)}]')
+                    print(f"[Delay: {round(delay, 2)} sec]")
                 self.speaker.flush()
 
     @staticmethod
@@ -218,6 +235,11 @@ def handler(_, __) -> None:
         orca = pvorca.create(access_key=config['access_key'])
         orca_stream = orca.stream_open()
         connection.send(orca.sample_rate)
+        connection.send({'version': orca.version})
+
+        orca_profiler = RTFProfiler(orca.sample_rate)
+        utterance_end_sec = 0
+        delay_sec = -1
 
         try:
             close = False
@@ -231,6 +253,7 @@ def handler(_, __) -> None:
                         close = True
                     elif message['command'] == Commands.START:
                         synthesizing = True
+                        utterance_end_sec = message['utterance_end_sec']
                     elif message['command'] == Commands.PROCESS:
                         if synthesizing:
                             text_queue.put(message['text'])
@@ -243,17 +266,28 @@ def handler(_, __) -> None:
                             text_queue.get()
                         orca_stream.flush()
                         connection.send({'command': Commands.INTERRUPT})
+                        orca_profiler.reset()
+                        utterance_end_sec = 0
+                        delay_sec = -1
                 if not text_queue.empty():
                     text = text_queue.get()
+                    orca_profiler.tick()
                     pcm = orca_stream.synthesize(text)
+                    orca_profiler.tock(pcm)
                     if pcm is not None:
                         connection.send({'command': Commands.SPEAK, 'pcm': pcm})
+                        if delay_sec == -1:
+                            delay_sec = time.perf_counter() - utterance_end_sec
                 if synthesizing and flushing and text_queue.empty():
                     synthesizing = False
                     flushing = False
+                    orca_profiler.tick()
                     pcm = orca_stream.flush()
+                    orca_profiler.tock(pcm)
                     connection.send({'command': Commands.SPEAK, 'pcm': pcm})
-                    connection.send({'command': Commands.FLUSH})
+                    connection.send({'command': Commands.FLUSH, 'profile': orca_profiler.rtf(), 'delay': delay_sec})
+                    utterance_end_sec = 0
+                    delay_sec = -1
                 elif flushing:
                     flushing = False
         finally:
@@ -266,20 +300,22 @@ def __init__(
             self,
             synthesizer: Synthesizer,
             pllm_connection: Connection,
-            pllm_process: Process):
+            pllm_process: Process,
+            config):
         self.synthesizer = synthesizer
         self.pllm_connection = pllm_connection
         self.pllm_process = pllm_process
+        self.config = config
 
     def close(self):
         self.pllm_connection.send({'command': Commands.CLOSE})
         self.pllm_process.join()
 
-    def process(self, text: str):
-        ppn_prompt = config['ppn_prompt']
+    def process(self, text: str, utterance_end_sec):
+        ppn_prompt = self.config['ppn_prompt']
         print(f'LLM (say ${ppn_prompt} to interrupt) > ', end='', flush=True)
 
-        self.synthesizer.start()
+        self.synthesizer.start(utterance_end_sec)
         self.pllm_connection.send({'command': Commands.PROCESS, 'text': text})
 
     def interrupt(self):
@@ -297,6 +333,9 @@ def tick(self):
                 self.synthesizer.process(message['text'])
             elif message['command'] == Commands.FLUSH:
                 print('', flush=True)
+                if self.config['profile']:
+                    tps = message['profile']
+                    print(f'[picoLLM TPS: {round(tps, 2)}]')
                 self.synthesizer.flush()
 
     @staticmethod
@@ -316,12 +355,17 @@ def handler(_, __) -> None:
             access_key=config['access_key'],
             model_path=config['picollm_model_path'],
             device=config['picollm_device'])
+
+        connection.send({'version': pllm.version, 'model': pllm.model})
+
         if config['picollm_system_prompt'] is not None:
             dialog = pllm.get_dialog(system=config['picollm_system_prompt'])
         else:
             dialog = pllm.get_dialog()
         generating = False
 
+        pllm_profiler = TPSProfiler()
+
         stop_phrases = {
             '</s>',  # Llama-2, Mistral, and Mixtral
             '<end_of_turn>',  # Gemma
@@ -332,6 +376,7 @@ def handler(_, __) -> None:
         completion = CompletionText(stop_phrases)
 
         def llm_callback(text):
+            pllm_profiler.tock()
             if generating:
                 completion.append(text)
                 new_tokens = completion.get_new_tokens()
@@ -368,6 +413,7 @@ def llm_task(text):
                     elif message['command'] == Commands.PROCESS:
                         generating = True
                         text = message['text']
+                        pllm_profiler.reset()
                         llm_future = executor.submit(llm_task, text)
                     elif message['command'] == Commands.INTERRUPT:
                         interrupting = True
@@ -381,7 +427,7 @@ def llm_task(text):
                         interrupting = False
                         connection.send({'command': Commands.INTERRUPT})
                     else:
-                        connection.send({'command': Commands.FLUSH})
+                        connection.send({'command': Commands.FLUSH, 'profile': pllm_profiler.tps()})
                     llm_future = None
                 if not llm_future and interrupting:
                     interrupting = False
@@ -398,10 +444,14 @@ def __init__(
             self,
             generator: Generator,
             porcupine: pvporcupine.Porcupine,
-            cheetah: pvcheetah.Cheetah):
+            cheetah: pvcheetah.Cheetah,
+            config):
         self.generator = generator
         self.porcupine = porcupine
         self.cheetah = cheetah
+        self.config = config
+        self.porcupine_profiler = RTFProfiler(porcupine.sample_rate)
+        self.cheetah_profiler = RTFProfiler(cheetah.sample_rate)
 
         self.sleeping = True
         self.listening = False
@@ -413,23 +463,37 @@ def close(self):
 
     def process(self, pcm: Optional[Sequence[int]]):
         if self.sleeping:
-            if self.porcupine.process(pcm) == 0:
+            self.porcupine_profiler.tick()
+            wake_word_detected = self.porcupine.process(pcm) == 0
+            self.porcupine_profiler.tock(pcm)
+            if wake_word_detected:
                 self.sleeping = False
                 self.tick_count = 4
                 self.generator.interrupt()
+                if self.config['profile']:
+                    print(f'[Porcupine RTF: {round(self.porcupine_profiler.rtf(), 2)}]')
+                self.porcupine_profiler.reset()
+                self.cheetah_profiler.reset()
         elif self.listening:
+            self.cheetah_profiler.tick()
             partial_transcript, endpoint_reached = self.cheetah.process(pcm)
+            self.cheetah_profiler.tock(pcm)
             if len(partial_transcript) > 0:
                 self.user_request += partial_transcript
                 print(partial_transcript, end='', flush=True)
             if endpoint_reached:
+                utterance_end_sec = time.perf_counter()
                 self.sleeping = True
                 self.listening = False
+                self.cheetah_profiler.tick()
                 remaining_transcript = self.cheetah.flush()
+                self.cheetah_profiler.tock()
                 if len(remaining_transcript) > 0:
                     self.user_request += remaining_transcript
                 print(remaining_transcript, flush=True)
-                self.generator.process(self.user_request)
+                if self.config['profile']:
+                    print(f'[Cheetah RTF: {round(self.cheetah_profiler.rtf(), 2)}]')
+                self.generator.process(self.user_request, utterance_end_sec)
                 self.user_request = ''
         elif self.tick_count > 0:
             self.tick_count -= 1
@@ -482,18 +546,28 @@ def handler(_, __) -> None:
             sensitivities=[config['porcupine_sensitivity']])
         config['ppn_prompt'] = 'the wake word'
 
+    print(f"→ Porcupine v{porcupine.version}")
+
     cheetah = pvcheetah.create(
         access_key=config['access_key'],
         endpoint_duration_sec=config['cheetah_endpoint_duration_sec'],
         enable_automatic_punctuation=True)
+    
+    print(f"→ Cheetah v{cheetah.version}")
 
     pv_recorder = PvRecorder(frame_length=porcupine.frame_length)
     pv_speaker = PvSpeaker(sample_rate=int(orca_connection.recv()), bits_per_sample=16, buffer_size_secs=1)
 
-    speaker = Speaker(pv_speaker, config['orca_warmup_sec'])
-    synthesizer = Synthesizer(speaker, orca_connection, orca_process)
-    generator = Generator(synthesizer, pllm_connection, pllm_process)
-    listener = Listener(generator, porcupine, cheetah)
+    pllm_info = pllm_connection.recv()
+    print(f"→ picoLLM v{pllm_info['version']} <{pllm_info['model']}>")
+
+    orca_info = orca_connection.recv()
+    print(f"→ Orca v{orca_info['version']}")
+
+    speaker = Speaker(pv_speaker, config)
+    synthesizer = Synthesizer(speaker, orca_connection, orca_process, config)
+    generator = Generator(synthesizer, pllm_connection, pllm_process, config)
+    listener = Listener(generator, porcupine, cheetah, config)
     recorder = Recorder(listener, pv_recorder)
 
     ppn_prompt = config['ppn_prompt']
diff --git a/recipes/llm-voice-assistant/python/cli/requirements.txt b/recipes/llm-voice-assistant/python/cli/requirements.txt
new file mode 100644
index 0000000..f0cf97c
--- /dev/null
+++ b/recipes/llm-voice-assistant/python/cli/requirements.txt
@@ -0,0 +1,6 @@
+picollm==1.2.3
+pvcheetah==2.0.1
+pvorca==1.0.0
+pvporcupine==3.0.2
+pvrecorder==1.2.2
+pvspeaker==1.0.3
\ No newline at end of file
diff --git a/recipes/llm-voice-assistant/python/windows_gui/README.md b/recipes/llm-voice-assistant/python/windows_gui/README.md
new file mode 100644
index 0000000..85bf491
--- /dev/null
+++ b/recipes/llm-voice-assistant/python/windows_gui/README.md
@@ -0,0 +1,45 @@
+## Compatibility
+
+- Python 3.8+
+- Runs on Windows (x86_64).
+
+## AccessKey
+
+AccessKey is your authentication and authorization token for deploying Picovoice SDKs, including picoLLM. Anyone who is
+using Picovoice needs to have a valid AccessKey. You must keep your AccessKey secret. You would need internet
+connectivity to validate your AccessKey with Picovoice license servers even though the LLM inference is running 100%
+offline and completely free for open-weight models. Everyone who signs up for
+[Picovoice Console](https://console.picovoice.ai/) receives a unique AccessKey.
+
+## picoLLM Model
+
+picoLLM Inference Engine supports many open-weight models. The models are on
+[Picovoice Console](https://console.picovoice.ai/).
+
+## Usage
+
+Install the required packages:
+
+```console
+pip install -r requirements.txt
+```
+
+Run the demo:
+
+```console
+python3 main.py --access_key ${ACCESS_KEY} --picollm_model_path ${PICOLLM_MODEL_PATH} 
+```
+
+Replace `${ACCESS_KEY}` with yours obtained from Picovoice Console and `${PICOLLM_MODEL_PATH}` with the path to the 
+model downloaded from Picovoice Console.
+
+To see all available options, type the following:
+
+```console
+python main.py --help
+```
+
+## Custom Wake Word
+
+The demo's default wake phrase is `Jarvis`. You can generate your custom (branded) wake word using Picovoice  Console by following [Porcupine Wake Word documentation (https://picovoice.ai/docs/porcupine/). Once you have the model trained, simply pass it to the demo
+application using `--keyword_model_path` argument.
\ No newline at end of file
diff --git a/recipes/llm-voice-assistant/python/windows_gui.py b/recipes/llm-voice-assistant/python/windows_gui/main.py
similarity index 100%
rename from recipes/llm-voice-assistant/python/windows_gui.py
rename to recipes/llm-voice-assistant/python/windows_gui/main.py
diff --git a/recipes/llm-voice-assistant/python/requirements.txt b/recipes/llm-voice-assistant/python/windows_gui/requirements.txt
similarity index 100%
rename from recipes/llm-voice-assistant/python/requirements.txt
rename to recipes/llm-voice-assistant/python/windows_gui/requirements.txt

From fb8848d15bfc456f11f6f6456adda117c07b0585 Mon Sep 17 00:00:00 2001
From: Matthew Maxwell <matthew@picovoice.ai>
Date: Mon, 30 Dec 2024 12:50:21 -0800
Subject: [PATCH 6/9] fixed python codestyle

---
 recipes/llm-voice-assistant/python/cli/main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/recipes/llm-voice-assistant/python/cli/main.py b/recipes/llm-voice-assistant/python/cli/main.py
index 50bb1c8..b8609fc 100644
--- a/recipes/llm-voice-assistant/python/cli/main.py
+++ b/recipes/llm-voice-assistant/python/cli/main.py
@@ -523,6 +523,7 @@ def tick(self):
         pcm = self.recorder.read()
         self.listener.process(pcm)
 
+
 def main(config):
     stop = [False]
 
@@ -552,7 +553,7 @@ def handler(_, __) -> None:
         access_key=config['access_key'],
         endpoint_duration_sec=config['cheetah_endpoint_duration_sec'],
         enable_automatic_punctuation=True)
-    
+
     print(f"→ Cheetah v{cheetah.version}")
 
     pv_recorder = PvRecorder(frame_length=porcupine.frame_length)

From 70627317303c6be49851187ebee4c127683a7205 Mon Sep 17 00:00:00 2001
From: Matthew Maxwell <matthew@picovoice.ai>
Date: Thu, 2 Jan 2025 11:05:50 -0800
Subject: [PATCH 7/9] added newline

---
 recipes/llm-voice-assistant/python/cli/main.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/cli/main.py b/recipes/llm-voice-assistant/python/cli/main.py
index b8609fc..4be536e 100644
--- a/recipes/llm-voice-assistant/python/cli/main.py
+++ b/recipes/llm-voice-assistant/python/cli/main.py
@@ -499,7 +499,7 @@ def process(self, pcm: Optional[Sequence[int]]):
             self.tick_count -= 1
         else:
             self.listening = True
-            print('$ Wake word detected, utter your request or question ...', flush=True)
+            print('\n$ Wake word detected, utter your request or question ...', flush=True)
             print('User > ', end='', flush=True)
 
 
@@ -601,10 +601,6 @@ def handler(_, __) -> None:
 
 
 if __name__ == '__main__':
-    if not sys.platform.lower().startswith('win'):
-        print('Error: Only runs on Windows platforms')
-        exit(1)
-
     parser = ArgumentParser()
     parser.add_argument(
         '--config',

From 7faf7d92784a1be92c135308e032c88487febbb5 Mon Sep 17 00:00:00 2001
From: Matthew Maxwell <matthew@picovoice.ai>
Date: Thu, 2 Jan 2025 12:50:30 -0800
Subject: [PATCH 8/9] updated readme

---
 recipes/llm-voice-assistant/python/cli/README.md  | 15 +++++++++++++++
 .../python/cli/requirements.txt                   |  2 +-
 .../python/windows_gui/README.md                  | 15 +++++++++++++++
 .../python/windows_gui/requirements.txt           |  2 +-
 4 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/cli/README.md b/recipes/llm-voice-assistant/python/cli/README.md
index 3bd2cb1..f763480 100644
--- a/recipes/llm-voice-assistant/python/cli/README.md
+++ b/recipes/llm-voice-assistant/python/cli/README.md
@@ -1,3 +1,7 @@
+# LLM Voice Assistant CLI Demo
+
+A voice assistant using Porcupine, Cheetah, picoLLM, and Orca with a text based interface.
+
 ## See It In Action!
 
 [![LLM VA in Action](https://img.youtube.com/vi/06K_YtUr8mc/0.jpg)](https://www.youtube.com/watch?v=06K_YtUr8mc)
@@ -43,6 +47,17 @@ To see all available options, type the following:
 python main.py --help
 ```
 
+## Config File
+
+In addition to command line arguments a config file can be used to pass arguments to the demo. By default the demo looks for `config.json` in the same directory as `main.py` but an alternative path can be passed using the `--config` option. Below is an example config file.
+
+```json
+{
+    "access_key": "${ACCESS_KEY}",
+    "picollm_model_path": "${PICOLLM_MODEL_PATH}"
+}
+```
+
 ## Custom Wake Word
 
 The demo's default wake phrase is `Picovoice`. You can generate your custom (branded) wake word using Picovoice  Console by following [Porcupine Wake Word documentation (https://picovoice.ai/docs/porcupine/). Once you have the model trained, simply pass it to the demo
diff --git a/recipes/llm-voice-assistant/python/cli/requirements.txt b/recipes/llm-voice-assistant/python/cli/requirements.txt
index f0cf97c..5f73eac 100644
--- a/recipes/llm-voice-assistant/python/cli/requirements.txt
+++ b/recipes/llm-voice-assistant/python/cli/requirements.txt
@@ -1,5 +1,5 @@
 picollm==1.2.3
-pvcheetah==2.0.1
+pvcheetah==2.1.0
 pvorca==1.0.0
 pvporcupine==3.0.2
 pvrecorder==1.2.2
diff --git a/recipes/llm-voice-assistant/python/windows_gui/README.md b/recipes/llm-voice-assistant/python/windows_gui/README.md
index 85bf491..a3ae570 100644
--- a/recipes/llm-voice-assistant/python/windows_gui/README.md
+++ b/recipes/llm-voice-assistant/python/windows_gui/README.md
@@ -1,3 +1,7 @@
+# LLM Voice Assistant GUI Demo
+
+A voice assistant using Porcupine, Cheetah, picoLLM, and Orca with a console based graphical interface.
+
 ## Compatibility
 
 - Python 3.8+
@@ -39,6 +43,17 @@ To see all available options, type the following:
 python main.py --help
 ```
 
+## Config File
+
+In addition to command line arguments a config file can be used to pass arguments to the demo. By default the demo looks for `config.json` in the same directory as `main.py` but an alternative path can be passed using the `--config` option. Below is an example config file.
+
+```json
+{
+    "access_key": "${ACCESS_KEY}",
+    "picollm_model_path": "${PICOLLM_MODEL_PATH}"
+}
+```
+
 ## Custom Wake Word
 
 The demo's default wake phrase is `Jarvis`. You can generate your custom (branded) wake word using Picovoice  Console by following [Porcupine Wake Word documentation (https://picovoice.ai/docs/porcupine/). Once you have the model trained, simply pass it to the demo
diff --git a/recipes/llm-voice-assistant/python/windows_gui/requirements.txt b/recipes/llm-voice-assistant/python/windows_gui/requirements.txt
index 3c73f69..2de6e31 100644
--- a/recipes/llm-voice-assistant/python/windows_gui/requirements.txt
+++ b/recipes/llm-voice-assistant/python/windows_gui/requirements.txt
@@ -1,5 +1,5 @@
 picollm==1.2.3
-pvcheetah==2.0.1
+pvcheetah==2.1.0
 pvorca==1.0.0
 pvporcupine==3.0.2
 pvrecorder==1.2.2

From 54b0586a9cb6d8160e0098b536c2d44812c75890 Mon Sep 17 00:00:00 2001
From: matt200-ok <matthew@picovoice.ai>
Date: Thu, 2 Jan 2025 13:52:18 -0800
Subject: [PATCH 9/9] Apply suggestions from code review

Co-authored-by: Ian Lavery <ian@picovoice.ai>
---
 recipes/llm-voice-assistant/python/cli/README.md         | 4 ++--
 recipes/llm-voice-assistant/python/windows_gui/README.md | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/cli/README.md b/recipes/llm-voice-assistant/python/cli/README.md
index f763480..7c64a06 100644
--- a/recipes/llm-voice-assistant/python/cli/README.md
+++ b/recipes/llm-voice-assistant/python/cli/README.md
@@ -1,6 +1,6 @@
-# LLM Voice Assistant CLI Demo
+# Cross-Platform LLM Voice Assistant CLI Demo
 
-A voice assistant using Porcupine, Cheetah, picoLLM, and Orca with a text based interface.
+A cross-platform voice assistant using Picovoice's Wake Word, STT, TTS and LLM technology with a text-based interface.
 
 ## See It In Action!
 
diff --git a/recipes/llm-voice-assistant/python/windows_gui/README.md b/recipes/llm-voice-assistant/python/windows_gui/README.md
index a3ae570..d01bc46 100644
--- a/recipes/llm-voice-assistant/python/windows_gui/README.md
+++ b/recipes/llm-voice-assistant/python/windows_gui/README.md
@@ -1,6 +1,6 @@
-# LLM Voice Assistant GUI Demo
+# Windows LLM Voice Assistant GUI Demo
 
-A voice assistant using Porcupine, Cheetah, picoLLM, and Orca with a console based graphical interface.
+A voice assistant for Windows using Picovoice's Wake Word, STT, TTS and LLM technology with a console-based graphical interface.
 
 ## Compatibility