Picovoice · laves · Jan 28, 2025 · Jan 16, 2025 · Jan 16, 2025 · Jan 16, 2025
diff --git a/recipes/llm-voice-assistant/nodejs/package.json b/recipes/llm-voice-assistant/nodejs/package.json
@@ -14,12 +14,12 @@
   "author": "Picovoice Inc.",
   "license": "Apache-2.0",
   "dependencies": {
-    "@picovoice/cheetah-node": "^2.1.0",
-    "@picovoice/orca-node": "^1.0.0",
-    "@picovoice/picollm-node": "1.2.3",
-    "@picovoice/porcupine-node": "^3.0.3",
-    "@picovoice/pvrecorder-node": "^1.2.3",
-    "@picovoice/pvspeaker-node": "^1.0.1",
+    "@picovoice/cheetah-node": "^2.1.1",
+    "@picovoice/orca-node": "^1.0.1",
+    "@picovoice/picollm-node": "1.2.4",
+    "@picovoice/porcupine-node": "^3.0.5",
+    "@picovoice/pvrecorder-node": "^1.2.5",
+    "@picovoice/pvspeaker-node": "^1.0.2",
     "@typescript-eslint/eslint-plugin": "^5.19.0",
     "@typescript-eslint/parser": "^5.19.0",
     "commander": "^6.1.0",

diff --git a/recipes/llm-voice-assistant/nodejs/yarn.lock b/recipes/llm-voice-assistant/nodejs/yarn.lock
@@ -74,35 +74,35 @@
     "@nodelib/fs.scandir" "2.1.5"
     fastq "^1.6.0"
 
-"@picovoice/cheetah-node@^2.1.0":
-  version "2.1.0"
-  resolved "https://registry.yarnpkg.com/@picovoice/cheetah-node/-/cheetah-node-2.1.0.tgz#d68a86b55bc21bf586a23f4c33563dce58b18385"
-  integrity sha512-B63Aqmjs2berQ/YVEooIJTSQVXrJ/naz5YG5ZG+mlyfahYaOBbudXAdQP4FxTiQBlGLNhQSqZjllgoEs394n1Q==
+"@picovoice/cheetah-node@^2.1.1":
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/@picovoice/cheetah-node/-/cheetah-node-2.1.1.tgz#ecc1762c56b090670853b52def624574d4cca015"
+  integrity sha512-nP+VTZYVqHF3BNz8BIuP5MAFCFJVVX6HWAQq5DStZ5c26yCUQwXs+orIlXeEpNZqlZOEvhkxbwACqFwORC3Ktw==
 
-"@picovoice/orca-node@^1.0.0":
-  version "1.0.0"
-  resolved "https://registry.yarnpkg.com/@picovoice/orca-node/-/orca-node-1.0.0.tgz#812728c3183a914eff6b3189dfa958ef4d44f2f7"
-  integrity sha512-YDTqJ5KsueBC4Nj0Zo287VF+/y7SRjXbOyHy8h66joJYPF0QNsz8oDCzbQO7hzymNbkFXd0crMPK+gQElvd83w==
+"@picovoice/orca-node@^1.0.1":
+  version "1.0.1"
+  resolved "https://registry.yarnpkg.com/@picovoice/orca-node/-/orca-node-1.0.1.tgz#455132dfc5f95c0a651e47a512d4018860b7286d"
+  integrity sha512-IOinMTVrqsTXhwDvqPFnoi/phgr2w6tj5XJ/DFTp+jSuzbZQaMEAOIeKptSKERuGWYkbzXdXDPZb4WRLanFYtQ==
 
-"@picovoice/[email protected].3":
-  version "1.2.3"
-  resolved "https://registry.yarnpkg.com/@picovoice/picollm-node/-/picollm-node-1.2.3.tgz#e987159a308ee44b5d947213ee4401d6ff30e6ed"
-  integrity sha512-WdtrmMbxAUS8SDr98VRG+LulGe8UvRclu0+bBnOnYdOfEG7+XWHzC1ddsXRyN9mdmpPJj5T0xxQ1uk9dN2j+aw==
+"@picovoice/[email protected].4":
+  version "1.2.4"
+  resolved "https://registry.yarnpkg.com/@picovoice/picollm-node/-/picollm-node-1.2.4.tgz#f0dc0db5195737d731df99c91dde3e4ce1bc4ca5"
+  integrity sha512-UlJtMkn8Rl9QdPklGCngGAJaCMwnVQQRHucinZ33gplezRWCsgFRW8SdqE7D7EyK8bILJwc67Ngdtg19kyI08w==
 
-"@picovoice/porcupine-node@^3.0.3":
-  version "3.0.4"
-  resolved "https://registry.yarnpkg.com/@picovoice/porcupine-node/-/porcupine-node-3.0.4.tgz#a4707810c99231d50f83e802907d2921b18c4941"
-  integrity sha512-cWFJSzQmEH45xVgfJ3z6S1pp0+0Tuki+7sBqamNa7K/s9j0ynW9WpJBeuMztRPrpwrDoAc7A2MZdK5UJ5wIebA==
+"@picovoice/porcupine-node@^3.0.5":
+  version "3.0.5"
+  resolved "https://registry.yarnpkg.com/@picovoice/porcupine-node/-/porcupine-node-3.0.5.tgz#d23af28b5a9cccc87c4f8c1ad18f83a96cc679f5"
+  integrity sha512-1N3Pf9eeLBP3XQcPgjNUDmUKxD7Wso65XtgpSnUiiCRvHhBD9GehfrJObx7nwFSazC6y/1uNWBnOJ32FKudPhQ==
 
-"@picovoice/pvrecorder-node@^1.2.3":
-  version "1.2.4"
-  resolved "https://registry.yarnpkg.com/@picovoice/pvrecorder-node/-/pvrecorder-node-1.2.4.tgz#1e67a1f82a144ad3c8e3f77c18fdbfe1ab5880cf"
-  integrity sha512-s8l6LtJnTHZ+FfIgXJZ9d8pKONSWs04v5q83F2zmfRr9IV1m7SQ5RlsmL0FO7NsB0GjIar3qHndryAQCjgSInw==
+"@picovoice/pvrecorder-node@^1.2.5":
+  version "1.2.5"
+  resolved "https://registry.yarnpkg.com/@picovoice/pvrecorder-node/-/pvrecorder-node-1.2.5.tgz#a47e11d347979ef0f1b083657ff69dd9bd2efe5c"
+  integrity sha512-bnid5oInf22JRdrZ75z8ooewOza0whYI9w/oSQSZbkDvddylPPrY6x+1L1qIyf5Tb8ZtzKQL+aQ9m8SazgRHFg==
 
-"@picovoice/pvspeaker-node@^1.0.1":
-  version "1.0.1"
-  resolved "https://registry.yarnpkg.com/@picovoice/pvspeaker-node/-/pvspeaker-node-1.0.1.tgz#0524c9509a88eb50f3975cbe3c5012058a54619b"
-  integrity sha512-4/QBYcp3GZ+2ewq2QuyCn7pnqXdfwj90qbINecKtBsCXKLtl/XHLmXOqgfoy+RBSXmA5wjV/LuOHvqVibrxAuQ==
+"@picovoice/pvspeaker-node@^1.0.2":
+  version "1.0.2"
+  resolved "https://registry.yarnpkg.com/@picovoice/pvspeaker-node/-/pvspeaker-node-1.0.2.tgz#5603864038ba7f2a8ce7f82507f68f4e3f1568b8"
+  integrity sha512-x7MTp6pdon7Dce1lp0yiM8wQcKkYt9jyLqiK2hjUuq85vGlILBpUfKCspuEUVZnyG/5tMic6VsGfXgET7n+O7A==
 
 "@types/json-schema@^7.0.9":
   version "7.0.15"

diff --git a/recipes/llm-voice-assistant/python/cli/main.py b/recipes/llm-voice-assistant/python/cli/main.py
@@ -4,10 +4,10 @@
 import sys
 import time
 from argparse import ArgumentParser
-from concurrent.futures import ThreadPoolExecutor
 from itertools import chain
 from multiprocessing import Event, Pipe, Process, Queue, active_children
 from multiprocessing.connection import Connection
+from threading import Thread
 from typing import Optional, Sequence
 
 
@@ -128,11 +128,10 @@ def __init__(
         self.speaking = False
         self.flushing = False
         self.pcmBuffer = []
-        self.executor = ThreadPoolExecutor()
         self.future = None
 
     def close(self):
-        self.executor.shutdown()
+        self.interrupt()
 
     def start(self):
         self.started = True
@@ -156,6 +155,8 @@ def tick(self):
         def stop():
             self.speaker.flush()
             self.speaker.stop()
+            ppn_prompt = self.config['ppn_prompt']
+            print(f'$ Say {ppn_prompt} ...', flush=True)
         if not self.speaking and len(self.pcmBuffer) > self.orca_warmup:
             self.speaking = True
             self.speaker.start()
@@ -167,11 +168,7 @@ def stop():
             self.started = False
             self.speaking = False
             self.flushing = False
-            self.future = self.executor.submit(stop)
-        if self.future and self.future.done():
-            self.future = None
-            ppn_prompt = self.config['ppn_prompt']
-            print(f'$ Say {ppn_prompt} ...', flush=True)
+            Thread(target=stop).start()
 
 
 class Synthesizer:
@@ -187,8 +184,12 @@ def __init__(
         self.config = config
 
     def close(self):
-        self.orca_connection.send({'command': Commands.CLOSE})
-        self.orca_process.join()
+        try:
+            self.orca_connection.send({'command': Commands.CLOSE})
+            self.orca_process.join(1.0)
+        except Exception as e:
+            sys.stderr.write(str(e))
+            self.orca_process.kill()
 
     def start(self, utterance_end_sec):
         self.speaker.start()
@@ -201,10 +202,11 @@ def flush(self):
         self.orca_connection.send({'command': Commands.FLUSH})
 
     def interrupt(self):
-        self.orca_connection.send({'command': Commands.INTERRUPT})
-        while self.orca_connection.poll() and self.orca_connection.recv()['command'] != Commands.INTERRUPT:
-            time.sleep(0.01)
-        self.speaker.interrupt()
+        try:
+            self.orca_connection.send({'command': Commands.INTERRUPT})
+            self.speaker.interrupt()
+        except Exception as e:
+            sys.stderr.write(str(e))
 
     def tick(self):
         while self.orca_connection.poll():
@@ -233,7 +235,7 @@ def handler(_, __) -> None:
         signal.signal(signal.SIGINT, handler)
 
         orca = pvorca.create(access_key=config['access_key'])
-        orca_stream = orca.stream_open()
+        orca_stream = orca.stream_open(speech_rate=config['orca_speech_rate'])
         connection.send(orca.sample_rate)
         connection.send({'version': orca.version})
 
@@ -247,10 +249,15 @@ def handler(_, __) -> None:
             flushing = False
             text_queue = Queue()
             while not close:
+                time.sleep(0.1)
                 while connection.poll():
                     message = connection.recv()
                     if message['command'] == Commands.CLOSE:
                         close = True
+                        synthesizing = False
+                        flushing = False
+                        while not text_queue.empty():
+                            text_queue.get()
                     elif message['command'] == Commands.START:
                         synthesizing = True
                         utterance_end_sec = message['utterance_end_sec']
@@ -265,19 +272,19 @@ def handler(_, __) -> None:
                         while not text_queue.empty():
                             text_queue.get()
                         orca_stream.flush()
-                        connection.send({'command': Commands.INTERRUPT})
                         orca_profiler.reset()
                         utterance_end_sec = 0
                         delay_sec = -1
-                if not text_queue.empty():
+                while not text_queue.empty():
                     text = text_queue.get()
-                    orca_profiler.tick()
-                    pcm = orca_stream.synthesize(text)
-                    orca_profiler.tock(pcm)
-                    if pcm is not None:
-                        connection.send({'command': Commands.SPEAK, 'pcm': pcm})
-                        if delay_sec == -1:
-                            delay_sec = time.perf_counter() - utterance_end_sec
+                    if synthesizing:
+                        orca_profiler.tick()
+                        pcm = orca_stream.synthesize(text)
+                        orca_profiler.tock(pcm)
+                        if pcm is not None:
+                            connection.send({'command': Commands.SPEAK, 'pcm': pcm})
+                            if delay_sec == -1:
+                                delay_sec = time.perf_counter() - utterance_end_sec
                 if synthesizing and flushing and text_queue.empty():
                     synthesizing = False
                     flushing = False
@@ -308,21 +315,22 @@ def __init__(
         self.config = config
 
     def close(self):
-        self.pllm_connection.send({'command': Commands.CLOSE})
-        self.pllm_process.join()
+        try:
+            self.pllm_connection.send({'command': Commands.CLOSE})
+            self.pllm_process.join(1.0)
+        except Exception as e:
+            sys.stderr.write(str(e))
+            self.pllm_process.kill()
 
     def process(self, text: str, utterance_end_sec):
         ppn_prompt = self.config['ppn_prompt']
-        print(f'LLM (say ${ppn_prompt} to interrupt) > ', end='', flush=True)
+        print(f'LLM (say {ppn_prompt} to interrupt) > ', end='', flush=True)
 
         self.synthesizer.start(utterance_end_sec)
         self.pllm_connection.send({'command': Commands.PROCESS, 'text': text})
 
     def interrupt(self):
         self.pllm_connection.send({'command': Commands.INTERRUPT})
-        while self.pllm_connection.poll() and self.pllm_connection.recv()['command'] != Commands.INTERRUPT:
-            time.sleep(0.01)
-        print('', flush=True)
         self.synthesizer.interrupt()
 
     def tick(self):
@@ -362,7 +370,6 @@ def handler(_, __) -> None:
             dialog = pllm.get_dialog(system=config['picollm_system_prompt'])
         else:
             dialog = pllm.get_dialog()
-        generating = False
 
         pllm_profiler = TPSProfiler()
 
@@ -377,65 +384,53 @@ def handler(_, __) -> None:
 
         def llm_callback(text):
             pllm_profiler.tock()
-            if generating:
-                completion.append(text)
-                new_tokens = completion.get_new_tokens()
-                if len(new_tokens) > 0:
-                    connection.send({'command': Commands.SYNTHESIZE, 'text': new_tokens})
-
-        def llm_task(text):
-            short_answers_instruction = \
-                "You are a voice assistant and your answers are very short but informative"
-            dialog.add_human_request(
-                f"{short_answers_instruction}. {text}" if config['short_answers'] else text)
-
-            completion.reset()
-            return pllm.generate(
-                prompt=dialog.prompt(),
-                completion_token_limit=config['picollm_completion_token_limit'],
-                stop_phrases=stop_phrases,
-                presence_penalty=config['picollm_presence_penalty'],
-                frequency_penalty=config['picollm_frequency_penalty'],
-                temperature=config['picollm_temperature'],
-                top_p=config['picollm_top_p'],
-                stream_callback=llm_callback)
+            completion.append(text)
+            new_tokens = completion.get_new_tokens()
+            if len(new_tokens) > 0:
+                connection.send({'command': Commands.SYNTHESIZE, 'text': new_tokens})
+
+        close = [False]
+        prompt = [None]
+
+        def event_manager():
+            while not close[0]:
+                message = connection.recv()
+                if message['command'] == Commands.CLOSE:
+                    close[0] = True
+                    pllm.interrupt()
+                    return
+                elif message['command'] == Commands.INTERRUPT:
+                    pllm.interrupt()
+                elif message['command'] == Commands.PROCESS:
+                    prompt[0] = message['text']
+        Thread(target=event_manager).start()
 
         try:
-            close = False
-            executor = ThreadPoolExecutor()
-            llm_future = None
-            interrupting = False
-            while not close:
-                while connection.poll():
-                    message = connection.recv()
-                    if message['command'] == Commands.CLOSE:
-                        close = True
-                    elif message['command'] == Commands.PROCESS:
-                        generating = True
-                        text = message['text']
-                        pllm_profiler.reset()
-                        llm_future = executor.submit(llm_task, text)
-                    elif message['command'] == Commands.INTERRUPT:
-                        interrupting = True
-                        generating = False
-                        pllm.interrupt()
-                if llm_future and llm_future.done():
-                    generating = False
-                    llm_result = llm_future.result()
-                    dialog.add_llm_response(llm_result.completion)
-                    if llm_result.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED:
-                        interrupting = False
-                        connection.send({'command': Commands.INTERRUPT})
-                    else:
+            while not close[0]:
+                if prompt[0] is not None:
+                    short_answers_instruction = \
+                        "You are a voice assistant and your answers are very short but informative"
+                    dialog.add_human_request(
+                        f"{short_answers_instruction}. {prompt[0]}" if config['short_answers'] else prompt[0])
+                    prompt[0] = None
+
+                    completion.reset()
+                    result = pllm.generate(
+                        prompt=dialog.prompt(),
+                        completion_token_limit=config['picollm_completion_token_limit'],
+                        stop_phrases=stop_phrases,
+                        presence_penalty=config['picollm_presence_penalty'],
+                        frequency_penalty=config['picollm_frequency_penalty'],
+                        temperature=config['picollm_temperature'],
+                        top_p=config['picollm_top_p'],
+                        stream_callback=llm_callback)
+
+                    dialog.add_llm_response(result.completion)
+                    if result.endpoint != picollm.PicoLLMEndpoints.INTERRUPTED:
                         connection.send({'command': Commands.FLUSH, 'profile': pllm_profiler.tps()})
-                    llm_future = None
-                if not llm_future and interrupting:
-                    interrupting = False
-                    connection.send({'command': Commands.INTERRUPT})
+                else:
+                    time.sleep(0.25)
         finally:
-            while llm_future and llm_future.done():
-                time.sleep(0.01)
-            del executor
             pllm.release()
 
 
@@ -576,23 +571,22 @@ def handler(_, __) -> None:
 
     try:
         while not stop[0]:
+            if not pllm_process.is_alive() or not orca_process.is_alive():
+                break
+
             recorder.tick()
             generator.tick()
             synthesizer.tick()
             speaker.tick()
     finally:
-        generator.interrupt()
-        generator.tick()
-        synthesizer.tick()
-        speaker.tick()
         recorder.close()
         listener.close()
         generator.close()
         synthesizer.close()
         speaker.close()
 
         for child in active_children():
-            child.terminate()
+            child.kill()
 
         porcupine.delete()
         cheetah.delete()
@@ -612,8 +606,8 @@ def handler(_, __) -> None:
         '--picollm_model_path',
         help='Absolute path to the file containing LLM parameters (`.pllm`).')
     parser.add_argument(
-        '--keyword-model_path',
-        help='Absolute path to the keyword model file (`.ppn`). If not set, `Picovoice` will be the wake phrase')
+        '--keyword_model_path',
+        help='Absolute path to the keyword model file (`.ppn`). If not set, `Jarvis` will be the wake phrase')
     parser.add_argument(
         '--cheetah_endpoint_duration_sec',
         type=float,
@@ -664,6 +658,10 @@ def handler(_, __) -> None:
         type=float,
         help="Duration of the synthesized audio to buffer before streaming it out. A higher value helps slower "
              "(e.g., Raspberry Pi) to keep up with real-time at the cost of increasing the initial delay.")
+    parser.add_argument(
+        '--orca_speech_rate',
+        type=float,
+        help="Rate of speech of the generated audio.")
     parser.add_argument(
         '--porcupine_sensitivity',
         type=float,
@@ -702,6 +700,7 @@ def handler(_, __) -> None:
         'picollm_top_p': 1,
         'picollm_system_prompt': None,
         'orca_warmup_sec': 0,
+        'orca_speech_rate': 1.0,
         'porcupine_sensitivity': 0.5,
         'short_answers': False,
         'profile': False

diff --git a/recipes/llm-voice-assistant/python/cli/requirements.txt b/recipes/llm-voice-assistant/python/cli/requirements.txt
@@ -1,6 +1,6 @@
-picollm==1.2.3
-pvcheetah==2.1.0
-pvorca==1.0.0
-pvporcupine==3.0.2
-pvrecorder==1.2.2
-pvspeaker==1.0.3
+picollm==1.2.4
+pvcheetah==2.1.1
+pvorca==1.0.1
+pvporcupine==3.0.4
+pvrecorder==1.2.4
+pvspeaker==1.0.4
diff --git a/recipes/llm-voice-assistant/python/windows_gui/main.py b/recipes/llm-voice-assistant/python/windows_gui/main.py
diff --git a/recipes/llm-voice-assistant/python/windows_gui/requirements.txt b/recipes/llm-voice-assistant/python/windows_gui/requirements.txt
@@ -1,8 +1,7 @@
-picollm==1.2.3
-pvcheetah==2.1.0
-pvorca==1.0.0
-pvporcupine==3.0.2
-pvrecorder==1.2.2
-pvspeaker==1.0.3
-windows-curses==2.4.0; sys_platform == 'win32'
-psutil==6.1.1; sys_platform == 'win32'
+picollm==1.2.4
+pvcheetah==2.1.1
+pvorca==1.0.1
+pvporcupine==3.0.4
+pvrecorder==1.2.4
+pvspeaker==1.0.4
+psutil==6.1.1
diff --git a/res/.lint/spell-check/dict.txt b/res/.lint/spell-check/dict.txt
@@ -34,6 +34,7 @@ pvrecorder
 pvspeaker
 samplerate
 sdcard
+sigwinch
 Spannable
 subwin
 tock