From 07854e5fc0727d4132bdc5268656125efe261881 Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Wed, 15 Jan 2025 16:18:36 -0800 Subject: [PATCH 1/8] updated packages --- recipes/llm-voice-assistant/nodejs/package.json | 12 ++++++------ .../llm-voice-assistant/python/cli/requirements.txt | 12 ++++++------ .../python/windows_gui/requirements.txt | 12 ++++++------ 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/recipes/llm-voice-assistant/nodejs/package.json b/recipes/llm-voice-assistant/nodejs/package.json index 99fd888..fba485b 100644 --- a/recipes/llm-voice-assistant/nodejs/package.json +++ b/recipes/llm-voice-assistant/nodejs/package.json @@ -14,12 +14,12 @@ "author": "Picovoice Inc.", "license": "Apache-2.0", "dependencies": { - "@picovoice/cheetah-node": "^2.1.0", - "@picovoice/orca-node": "^1.0.0", - "@picovoice/picollm-node": "1.2.3", - "@picovoice/porcupine-node": "^3.0.3", - "@picovoice/pvrecorder-node": "^1.2.3", - "@picovoice/pvspeaker-node": "^1.0.1", + "@picovoice/cheetah-node": "^2.1.1", + "@picovoice/orca-node": "^1.0.1", + "@picovoice/picollm-node": "1.2.4", + "@picovoice/porcupine-node": "^3.0.5", + "@picovoice/pvrecorder-node": "^1.2.5", + "@picovoice/pvspeaker-node": "^1.0.2", "@typescript-eslint/eslint-plugin": "^5.19.0", "@typescript-eslint/parser": "^5.19.0", "commander": "^6.1.0", diff --git a/recipes/llm-voice-assistant/python/cli/requirements.txt b/recipes/llm-voice-assistant/python/cli/requirements.txt index 5f73eac..e3afb4e 100644 --- a/recipes/llm-voice-assistant/python/cli/requirements.txt +++ b/recipes/llm-voice-assistant/python/cli/requirements.txt @@ -1,6 +1,6 @@ -picollm==1.2.3 -pvcheetah==2.1.0 -pvorca==1.0.0 -pvporcupine==3.0.2 -pvrecorder==1.2.2 -pvspeaker==1.0.3 \ No newline at end of file +picollm==1.2.4 +pvcheetah==2.1.1 +pvorca==1.0.1 +pvporcupine==3.0.4 +pvrecorder==1.2.4 +pvspeaker==1.0.4 \ No newline at end of file diff --git a/recipes/llm-voice-assistant/python/windows_gui/requirements.txt b/recipes/llm-voice-assistant/python/windows_gui/requirements.txt index 2de6e31..1e9385f 100644 --- a/recipes/llm-voice-assistant/python/windows_gui/requirements.txt +++ b/recipes/llm-voice-assistant/python/windows_gui/requirements.txt @@ -1,8 +1,8 @@ -picollm==1.2.3 -pvcheetah==2.1.0 -pvorca==1.0.0 -pvporcupine==3.0.2 -pvrecorder==1.2.2 -pvspeaker==1.0.3 +picollm==1.2.4 +pvcheetah==2.1.1 +pvorca==1.0.1 +pvporcupine==3.0.4 +pvrecorder==1.2.4 +pvspeaker==1.0.4 windows-curses==2.4.0; sys_platform == 'win32' psutil==6.1.1; sys_platform == 'win32' \ No newline at end of file From c27580b946e3bfbc8052fdc2351a7b4621fbf58b Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Thu, 16 Jan 2025 09:08:59 -0800 Subject: [PATCH 2/8] updated lockfile --- recipes/llm-voice-assistant/nodejs/yarn.lock | 48 ++++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/recipes/llm-voice-assistant/nodejs/yarn.lock b/recipes/llm-voice-assistant/nodejs/yarn.lock index 10ff951..34040b3 100644 --- a/recipes/llm-voice-assistant/nodejs/yarn.lock +++ b/recipes/llm-voice-assistant/nodejs/yarn.lock @@ -74,35 +74,35 @@ "@nodelib/fs.scandir" "2.1.5" fastq "^1.6.0" -"@picovoice/cheetah-node@^2.1.0": - version "2.1.0" - resolved "https://registry.yarnpkg.com/@picovoice/cheetah-node/-/cheetah-node-2.1.0.tgz#d68a86b55bc21bf586a23f4c33563dce58b18385" - integrity sha512-B63Aqmjs2berQ/YVEooIJTSQVXrJ/naz5YG5ZG+mlyfahYaOBbudXAdQP4FxTiQBlGLNhQSqZjllgoEs394n1Q== +"@picovoice/cheetah-node@^2.1.1": + version "2.1.1" + resolved "https://registry.yarnpkg.com/@picovoice/cheetah-node/-/cheetah-node-2.1.1.tgz#ecc1762c56b090670853b52def624574d4cca015" + integrity sha512-nP+VTZYVqHF3BNz8BIuP5MAFCFJVVX6HWAQq5DStZ5c26yCUQwXs+orIlXeEpNZqlZOEvhkxbwACqFwORC3Ktw== -"@picovoice/orca-node@^1.0.0": - version "1.0.0" - resolved "https://registry.yarnpkg.com/@picovoice/orca-node/-/orca-node-1.0.0.tgz#812728c3183a914eff6b3189dfa958ef4d44f2f7" - integrity sha512-YDTqJ5KsueBC4Nj0Zo287VF+/y7SRjXbOyHy8h66joJYPF0QNsz8oDCzbQO7hzymNbkFXd0crMPK+gQElvd83w== +"@picovoice/orca-node@^1.0.1": + version "1.0.1" + resolved "https://registry.yarnpkg.com/@picovoice/orca-node/-/orca-node-1.0.1.tgz#455132dfc5f95c0a651e47a512d4018860b7286d" + integrity sha512-IOinMTVrqsTXhwDvqPFnoi/phgr2w6tj5XJ/DFTp+jSuzbZQaMEAOIeKptSKERuGWYkbzXdXDPZb4WRLanFYtQ== -"@picovoice/picollm-node@1.2.3": - version "1.2.3" - resolved "https://registry.yarnpkg.com/@picovoice/picollm-node/-/picollm-node-1.2.3.tgz#e987159a308ee44b5d947213ee4401d6ff30e6ed" - integrity sha512-WdtrmMbxAUS8SDr98VRG+LulGe8UvRclu0+bBnOnYdOfEG7+XWHzC1ddsXRyN9mdmpPJj5T0xxQ1uk9dN2j+aw== +"@picovoice/picollm-node@1.2.4": + version "1.2.4" + resolved "https://registry.yarnpkg.com/@picovoice/picollm-node/-/picollm-node-1.2.4.tgz#f0dc0db5195737d731df99c91dde3e4ce1bc4ca5" + integrity sha512-UlJtMkn8Rl9QdPklGCngGAJaCMwnVQQRHucinZ33gplezRWCsgFRW8SdqE7D7EyK8bILJwc67Ngdtg19kyI08w== -"@picovoice/porcupine-node@^3.0.3": - version "3.0.4" - resolved "https://registry.yarnpkg.com/@picovoice/porcupine-node/-/porcupine-node-3.0.4.tgz#a4707810c99231d50f83e802907d2921b18c4941" - integrity sha512-cWFJSzQmEH45xVgfJ3z6S1pp0+0Tuki+7sBqamNa7K/s9j0ynW9WpJBeuMztRPrpwrDoAc7A2MZdK5UJ5wIebA== +"@picovoice/porcupine-node@^3.0.5": + version "3.0.5" + resolved "https://registry.yarnpkg.com/@picovoice/porcupine-node/-/porcupine-node-3.0.5.tgz#d23af28b5a9cccc87c4f8c1ad18f83a96cc679f5" + integrity sha512-1N3Pf9eeLBP3XQcPgjNUDmUKxD7Wso65XtgpSnUiiCRvHhBD9GehfrJObx7nwFSazC6y/1uNWBnOJ32FKudPhQ== -"@picovoice/pvrecorder-node@^1.2.3": - version "1.2.4" - resolved "https://registry.yarnpkg.com/@picovoice/pvrecorder-node/-/pvrecorder-node-1.2.4.tgz#1e67a1f82a144ad3c8e3f77c18fdbfe1ab5880cf" - integrity sha512-s8l6LtJnTHZ+FfIgXJZ9d8pKONSWs04v5q83F2zmfRr9IV1m7SQ5RlsmL0FO7NsB0GjIar3qHndryAQCjgSInw== +"@picovoice/pvrecorder-node@^1.2.5": + version "1.2.5" + resolved "https://registry.yarnpkg.com/@picovoice/pvrecorder-node/-/pvrecorder-node-1.2.5.tgz#a47e11d347979ef0f1b083657ff69dd9bd2efe5c" + integrity sha512-bnid5oInf22JRdrZ75z8ooewOza0whYI9w/oSQSZbkDvddylPPrY6x+1L1qIyf5Tb8ZtzKQL+aQ9m8SazgRHFg== -"@picovoice/pvspeaker-node@^1.0.1": - version "1.0.1" - resolved "https://registry.yarnpkg.com/@picovoice/pvspeaker-node/-/pvspeaker-node-1.0.1.tgz#0524c9509a88eb50f3975cbe3c5012058a54619b" - integrity sha512-4/QBYcp3GZ+2ewq2QuyCn7pnqXdfwj90qbINecKtBsCXKLtl/XHLmXOqgfoy+RBSXmA5wjV/LuOHvqVibrxAuQ== +"@picovoice/pvspeaker-node@^1.0.2": + version "1.0.2" + resolved "https://registry.yarnpkg.com/@picovoice/pvspeaker-node/-/pvspeaker-node-1.0.2.tgz#5603864038ba7f2a8ce7f82507f68f4e3f1568b8" + integrity sha512-x7MTp6pdon7Dce1lp0yiM8wQcKkYt9jyLqiK2hjUuq85vGlILBpUfKCspuEUVZnyG/5tMic6VsGfXgET7n+O7A== "@types/json-schema@^7.0.9": version "7.0.15" From c0b1655bdee922eb5f80df939bdeecc0eda9ce60 Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Thu, 16 Jan 2025 13:19:24 -0800 Subject: [PATCH 3/8] switch from curses to VT100 --- .../python/windows_gui/main.py | 352 +++++++++++------- 1 file changed, 221 insertions(+), 131 deletions(-) diff --git a/recipes/llm-voice-assistant/python/windows_gui/main.py b/recipes/llm-voice-assistant/python/windows_gui/main.py index fa5af77..22929e2 100644 --- a/recipes/llm-voice-assistant/python/windows_gui/main.py +++ b/recipes/llm-voice-assistant/python/windows_gui/main.py @@ -1,4 +1,3 @@ -import curses import json import math import os @@ -150,8 +149,11 @@ def __init__( self.orca_process = orca_process def close(self): - self.orca_connection.send({'command': Commands.CLOSE}) - self.orca_process.join() + try: + self.orca_connection.send({'command': Commands.CLOSE}) + self.orca_process.join(1.0) + except: + self.orca_process.kill() def start(self): self.speaker.start() @@ -164,10 +166,13 @@ def flush(self): self.orca_connection.send({'command': Commands.FLUSH}) def interrupt(self): - self.orca_connection.send({'command': Commands.INTERRUPT}) - while self.orca_connection.poll() and self.orca_connection.recv()['command'] != Commands.INTERRUPT: - time.sleep(0.01) - self.speaker.interrupt() + try: + self.orca_connection.send({'command': Commands.INTERRUPT}) + while self.orca_connection.poll() and self.orca_connection.recv()['command'] != Commands.INTERRUPT: + time.sleep(0.1) + self.speaker.interrupt() + except: + pass def tick(self): while self.orca_connection.poll(): @@ -204,6 +209,10 @@ def handler(_, __) -> None: message = connection.recv() if message['command'] == Commands.CLOSE: close = True + synthesizing = False + flushing = False + while not text_queue.empty(): + text_queue.get() elif message['command'] == Commands.START: synthesizing = True elif message['command'] == Commands.PROCESS: @@ -249,8 +258,11 @@ def __init__( self.pllm_process = pllm_process def close(self): - self.pllm_connection.send({'command': Commands.CLOSE}) - self.pllm_process.join() + try: + self.pllm_connection.send({'command': Commands.CLOSE}) + self.pllm_process.join(1.0) + except: + self.pllm_process.kill() def process(self, text: str): self.synthesizer.start() @@ -259,7 +271,7 @@ def process(self, text: str): def interrupt(self): self.pllm_connection.send({'command': Commands.INTERRUPT}) while self.pllm_connection.poll() and self.pllm_connection.recv()['command'] != Commands.INTERRUPT: - time.sleep(0.01) + time.sleep(0.1) self.synthesizer.interrupt() def tick(self): @@ -340,6 +352,7 @@ def llm_task(text): message = connection.recv() if message['command'] == Commands.CLOSE: close = True + pllm.interrupt() elif message['command'] == Commands.PROCESS: generating = True text = message['text'] @@ -362,8 +375,6 @@ def llm_task(text): interrupting = False connection.send({'command': Commands.INTERRUPT}) finally: - while llm_future and llm_future.done(): - time.sleep(0.01) del executor pllm.release() @@ -439,6 +450,100 @@ def tick(self): self.queue.put({'command': Commands.PCM_IN, 'pcm': pcm, 'sample-rate': self.recorder.sample_rate}) +class Window: + @staticmethod + def reset(): + os.system('cls' if os.name == 'nt' else 'clear') + + @staticmethod + def goto(y, x): + return f"\u001B[{y+1};{x+1}H" + + @staticmethod + def color(col): + return f"\u001B[{';'.join([str(arg) for arg in col])}m" + + @staticmethod + def present(): + sys.stdout.flush() + + def __init__(self, height, width, y = 0, x = 0): + self.height = height + self.width = width + self.y = y + self.x = x + + def subwin(self, height, width, y, x): + return Window(height, width, self.y + y, self.x + x) + + def clear(self): + display = ' ' * self.width + sys.stdout.write(Window.color([0])) + for i in range(self.height): + sys.stdout.write(Window.goto(self.y + i, self.x)) + sys.stdout.write(display) + + def write(self, y, x, *args): + sys.stdout.write(Window.goto(self.y + y, self.x + x)) + sys.stdout.write(Window.color([0])) + for text in args: + sys.stdout.write(text) + + def box(self): + TOP = '┌' + '─' * (self.width - 2) + '┐' + ROW = '│' + ' ' * (self.width - 2) + '│' + BOTTOM = '└' + '─' * (self.width - 2) + '┘' + sys.stdout.write(Window.color([0])) + sys.stdout.write(Window.goto(self.y, self.x) + TOP) + for i in range(1, self.height - 1): + sys.stdout.write(Window.goto(self.y + i, self.x) + ROW) + sys.stdout.write(Window.goto(self.y + self.height - 1, self.x) + BOTTOM) + + +class VerticalBar: + def __init__(self, window: Window, title: str, color: list = [0]): + self.window = window + self.title = title + self.color = color + self.prev = None + + def set_title(self, title: str): + self.title = title + self.window.write(1, 1, self.title.center(self.window.width - 2)) + + def update(self, value): + current = round(value * (self.window.height - 4)) + display = '▄' * (self.window.width - 4) + + if self.prev != current: + self.prev = current + self.window.box() + self.window.write(1, 1, self.title.center(self.window.width - 2)) + for i in range(current): + self.window.write(self.window.height - i - 2, 2, Window.color(self.color), display) + + +class HorizontalBar: + def __init__(self, window: Window, title: str): + self.window = window + self.title = title + self.prev = None + + def update(self, value, text): + current = (round(value * (self.window.width - 4)), text) + display0 = '▖' * current[0] + display1 = '▌' * current[0] + + if self.prev != current: + self.prev = current + self.window.box() + self.window.write(1, 2, self.title.ljust(12) + text.rjust(self.window.width - 16)) + self.window.write(1, self.window.width) + self.window.write(2, 2, display0) + for i in range(3, self.window.height - 1): + self.window.write(i, 2, display1) + + class Display: def __init__(self, queue: Queue, config): self.queue = queue @@ -447,13 +552,26 @@ def __init__(self, queue: Queue, config): self.current_time = time.time() self.model_name = None - self.screen = curses.initscr() - self.height, self.width = self.screen.getmaxyx() - - if self.height < 30 or self.width < 120: - print(f'Error: Console window not large enough was ({self.height}, {self.width}) needs (30, 120)') + width, height = os.get_terminal_size() + if height < 30 or width < 120: + print(f'Error: Console window not large enough was ({height}, {width}) needs (30, 120)') exit(1) + self.prompt_text = [ + 'Loading...', + 'Say `Jarvis`', + 'Ask a Question', + 'Say `Jarvis` to Interrupt' + ] + + self.title_text = [ + '', + '░█▀█░▀█▀░█▀▀░█▀█░█░█░█▀█░▀█▀░█▀▀░█▀▀░', + '░█▀▀░░█░░█░░░█░█░▀▄▀░█░█░░█░░█░░░█▀▀░', + '░▀░░░▀▀▀░▀▀▀░▀▀▀░░▀░░▀▀▀░▀▀▀░▀▀▀░▀▀▀░', + '' + ] + self.last_blink = 0.0 self.in_blink = False self.text_state = 0 @@ -467,30 +585,31 @@ def __init__(self, queue: Queue, config): self.volume_out = [0.0] * 12 self.volume_index_out = 0 - curses.curs_set(0) - curses.start_color() - curses.use_default_colors() - curses.init_color(128, 500, 500, 500) - curses.init_color(129, 215, 489, 999) - curses.init_color(130, 215, 999, 489) - curses.init_pair(1, 128, curses.COLOR_BLACK) - curses.init_pair(2, 129, curses.COLOR_BLACK) - curses.init_pair(3, 130, curses.COLOR_BLACK) - - self.window = curses.newwin(self.height, self.width) - self.prompt = self.window.subwin(1, self.width - 2, self.height - 2, 1) - self.pcm_in = self.window.subwin(self.height - 10, 20, 7, 2) - self.pcm_out = self.window.subwin(self.height - 10, 20, 7, 23) + Window.reset() + self.screen = Window(height, width, 0, 0) + self.title = self.screen.subwin(6, self.screen.width - 4, 1, 2) + self.prompt = self.screen.subwin(1, self.screen.width - 2, self.screen.height - 2, 1) + self.pcm_in = VerticalBar(self.screen.subwin(self.screen.height - 10, 20, 7, 2), 'You', [38, 2, 55, 255, 125]) + self.pcm_out = VerticalBar(self.screen.subwin(self.screen.height - 10, 20, 7, 23), 'AI', [38, 2, 55, 125, 255]) self.usage = { - 'CPU': self.window.subwin(6, self.width - 47, 7, 45), - 'GPU': self.window.subwin(6, self.width - 47, 14, 45), - 'RAM': self.window.subwin(6, self.width - 47, 21, 45), + 'CPU': HorizontalBar(self.screen.subwin(6, self.screen.width - 47, 7, 45), 'CPU'), + 'GPU': HorizontalBar(self.screen.subwin(6, self.screen.width - 47, 14, 45), 'GPU'), + 'RAM': HorizontalBar(self.screen.subwin(6, self.screen.width - 47, 21, 45), 'RAM'), } - for key in self.usage: - self.usage[key].box() - self.usage[key].addstr(1, 2, key) + self.screen.box() + self.render_title() + self.render_prompt(0) + + self.pcm_in.update(0) + self.pcm_out.update(0) + self.usage['CPU'].update(0, '') + self.usage['GPU'].update(0, '') + self.usage['RAM'].update(0, '') + + self.title.write(0, 0) + Window.present() def start(self, pids: list): self.should_close = Event() @@ -505,20 +624,20 @@ def start(self, pids: list): def close(self): self.should_close.set() for process in self.processes: - process.join() - curses.endwin() + process.join(1.0) + Window.reset() - def render_prompt(self): - text_states = [ - 'Loading...', - 'Say `Jarvis`', - 'Ask a Question', - 'Say `Jarvis` to Interrupt' - ] + def render_title(self): + for i, line in enumerate(self.title_text): + display = line.center(self.title.width, '░') + self.title.write(i, 0, display) + + def render_prompt(self, text_state = None): + if text_state: + self.text_state = text_state self.prompt.clear() - self.prompt.addstr(0, 3, text_states[self.text_state]) - self.prompt.addch(0, 1, '>', curses.color_pair(1) if self.in_blink else 0) + self.prompt.write(0, 1, Window.color([90]) if self.in_blink else '', '> ', Window.color([0]), self.prompt_text[self.text_state]) def tick(self): self.prev_time = self.current_time @@ -528,8 +647,7 @@ def tick(self): while not self.queue.empty(): message = self.queue.get() if message['command'] == Commands.TEXT_STATE: - self.text_state = int(message['state']) - self.render_prompt() + self.render_prompt(int(message['state'])) elif message['command'] == Commands.PCM_IN: self.samples_in = message['pcm'] self.sample_rate_in = message['sample-rate'] @@ -541,19 +659,11 @@ def tick(self): elif message['command'] == Commands.USAGE: name = message['name'] text = message['text'] - bar = message['bar'] - height, width = self.usage[name].getmaxyx() - bar_width = round((width - 4) * max(0, min(1, bar))) - self.usage[name].clear() - self.usage[name].box() - text0 = f'{text}'.rjust(width - 12) - self.usage[name].addstr(1, 2, f'{name:<8}{text0}') - for j in range(height - 3): - for i in range(bar_width): - self.usage[name].addch(2 + j, 2 + i, '▖' if j == 0 else '▌') - self.usage[name].refresh() + bar = max(0, min(1, message['bar'])) + self.usage[name].update(bar, text) elif message['command'] == Commands.MODEL_NAME: - self.model_name = message['name'] + if message['name'] and len(message['name']) < 18: + self.pcm_out.set_title(message['name']) if self.current_time > self.last_blink + 0.5: self.last_blink = self.current_time @@ -593,37 +703,11 @@ def compute_amplitude(samples, sample_max=32768, scale=1.0): volume_in = sum(self.volume_in) / len(self.volume_in) volume_out = sum(self.volume_out) / len(self.volume_out) - self.pcm_in.clear() - self.pcm_out.clear() - self.pcm_in.box() - self.pcm_out.box() - height_in, width_in = self.pcm_in.getmaxyx() - height_out, width_out = self.pcm_out.getmaxyx() - self.pcm_in.addstr(1, 1, 'You'.center(18)) - model_name = f'{self.model_name}' if self.model_name and len(self.model_name) < 18 else 'AI' - self.pcm_out.addstr(1, 1, model_name.center(18)) - for j in range(width_in - 4): - for i in range(int(volume_in * (height_in - 4))): - self.pcm_in.addch(height_in - 2 - i, 2 + j, '▄', curses.color_pair(3)) - for j in range(width_out - 4): - for i in range(int(volume_out * (height_out - 4))): - self.pcm_out.addch(height_out - 2 - i, 2 + j, '▄', curses.color_pair(2)) - - title_text = [ - '', - '░█▀█░▀█▀░█▀▀░█▀█░█░█░█▀█░▀█▀░█▀▀░█▀▀░', - '░█▀▀░░█░░█░░░█░█░▀▄▀░█░█░░█░░█░░░█▀▀░', - '░▀░░░▀▀▀░▀▀▀░▀▀▀░░▀░░▀▀▀░▀▀▀░▀▀▀░▀▀▀░', - '' - ] + self.pcm_in.update(volume_in) + self.pcm_out.update(volume_out) - self.title = self.window.subwin(6, self.width - 4, 1, 2) - for i, line in enumerate(title_text): - display = line.center(self.width - 4, '░') - self.title.addstr(i, 0, display) - - self.window.box() - self.window.refresh() + self.title.write(0, 0) + Window.present() @staticmethod def run_command(command): @@ -639,14 +723,17 @@ def handler(_, __) -> None: pass signal.signal(signal.SIGINT, handler) - while not should_close.is_set(): - cpu_usage = sum([psutil.Process(pid).cpu_percent(0.25) for pid in pids]) / psutil.cpu_count() - queue.put({ - 'command': Commands.USAGE, - 'name': 'CPU', - 'text': f"{math.ceil(cpu_usage)}%", - 'bar': (cpu_usage / 100) - }) + try: + while not should_close.is_set(): + cpu_usage = sum([psutil.Process(pid).cpu_percent(0.25) for pid in pids]) / psutil.cpu_count() + queue.put({ + 'command': Commands.USAGE, + 'name': 'CPU', + 'text': f"{math.ceil(cpu_usage)}%", + 'bar': (cpu_usage / 100) + }) + except: + pass @staticmethod def worker_gpu(queue: Queue, should_close, pids: list): @@ -654,19 +741,22 @@ def handler(_, __) -> None: pass signal.signal(signal.SIGINT, handler) - gpu_usage_counters = ', '.join([r'"\GPU Engine(pid_{}_*)\Utilization Percentage"'.format(pid) for pid in pids]) - gpu_usage_cmd = r'(((Get-Counter {}).CounterSamples | where CookedValue).CookedValue | measure -sum).sum' - gpu_usage_cmd = gpu_usage_cmd.format(gpu_usage_counters) - while not should_close.is_set(): - gpu_usage = Display.run_command(gpu_usage_cmd) - if gpu_usage is not None: - gpu_usage = max(0, min(100, gpu_usage)) - queue.put({ - 'command': Commands.USAGE, - 'name': 'GPU', - 'text': f"{math.ceil(gpu_usage)}%", - 'bar': (float(gpu_usage) / 100) - }) + try: + gpu_usage_counters = ', '.join([r'"\GPU Engine(pid_{}_*)\Utilization Percentage"'.format(pid) for pid in pids]) + gpu_usage_cmd = r'(((Get-Counter {}).CounterSamples | where CookedValue).CookedValue | measure -sum).sum' + gpu_usage_cmd = gpu_usage_cmd.format(gpu_usage_counters) + while not should_close.is_set(): + gpu_usage = Display.run_command(gpu_usage_cmd) + if gpu_usage is not None: + gpu_usage = max(0, min(100, gpu_usage)) + queue.put({ + 'command': Commands.USAGE, + 'name': 'GPU', + 'text': f"{math.ceil(gpu_usage)}%", + 'bar': (float(gpu_usage) / 100) + }) + except: + pass @staticmethod def worker_ram(queue: Queue, should_close, pids: list): @@ -674,17 +764,20 @@ def handler(_, __) -> None: pass signal.signal(signal.SIGINT, handler) - ram_total = psutil.virtual_memory().total / 1024 / 1024 / 1024 - while not should_close.is_set(): - time.sleep(0.25) - ram_usage = sum([psutil.Process(pid).memory_info().rss for pid in pids]) / 1024 / 1024 / 1024 - if ram_usage is not None: - queue.put({ - 'command': Commands.USAGE, - 'name': 'RAM', - 'text': f"{round(ram_usage, 2)}GB / {round(ram_total, 2)}GB", - 'bar': (float(ram_usage) / float(ram_total)) - }) + try: + ram_total = psutil.virtual_memory().total / 1024 / 1024 / 1024 + while not should_close.is_set(): + time.sleep(0.25) + ram_usage = sum([psutil.Process(pid).memory_info().rss for pid in pids]) / 1024 / 1024 / 1024 + if ram_usage is not None: + queue.put({ + 'command': Commands.USAGE, + 'name': 'RAM', + 'text': f"{round(ram_usage, 2)}GB / {round(ram_total, 2)}GB", + 'bar': (float(ram_usage) / float(ram_total)) + }) + except: + pass def main(config): @@ -732,18 +825,15 @@ def handler(_, __) -> None: try: while not stop[0]: + if not pllm_process.is_alive() or not orca_process.is_alive(): + break + recorder.tick() generator.tick() synthesizer.tick() speaker.tick() display.tick() finally: - generator.interrupt() - generator.tick() - synthesizer.tick() - speaker.tick() - display.tick() - display.close() recorder.close() listener.close() @@ -752,7 +842,7 @@ def handler(_, __) -> None: speaker.close() for child in active_children(): - child.terminate() + child.kill() porcupine.delete() cheetah.delete() From b6ea59b4a3a95937e55080434fff9928b2c1b73f Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Thu, 16 Jan 2025 13:48:13 -0800 Subject: [PATCH 4/8] fixed codestyle --- .../python/windows_gui/main.py | 47 ++++++++++--------- .../python/windows_gui/requirements.txt | 3 +- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/recipes/llm-voice-assistant/python/windows_gui/main.py b/recipes/llm-voice-assistant/python/windows_gui/main.py index 22929e2..903a221 100644 --- a/recipes/llm-voice-assistant/python/windows_gui/main.py +++ b/recipes/llm-voice-assistant/python/windows_gui/main.py @@ -152,7 +152,8 @@ def close(self): try: self.orca_connection.send({'command': Commands.CLOSE}) self.orca_process.join(1.0) - except: + except Exception as e: + sys.stderr.write(str(e)) self.orca_process.kill() def start(self): @@ -171,8 +172,8 @@ def interrupt(self): while self.orca_connection.poll() and self.orca_connection.recv()['command'] != Commands.INTERRUPT: time.sleep(0.1) self.speaker.interrupt() - except: - pass + except Exception as e: + sys.stderr.write(str(e)) def tick(self): while self.orca_connection.poll(): @@ -261,7 +262,8 @@ def close(self): try: self.pllm_connection.send({'command': Commands.CLOSE}) self.pllm_process.join(1.0) - except: + except Exception as e: + sys.stderr.write(str(e)) self.pllm_process.kill() def process(self, text: str): @@ -457,7 +459,7 @@ def reset(): @staticmethod def goto(y, x): - return f"\u001B[{y+1};{x+1}H" + return f"\u001B[{y + 1};{x + 1}H" @staticmethod def color(col): @@ -467,7 +469,7 @@ def color(col): def present(): sys.stdout.flush() - def __init__(self, height, width, y = 0, x = 0): + def __init__(self, height, width, y=0, x=0): self.height = height self.width = width self.y = y @@ -490,14 +492,14 @@ def write(self, y, x, *args): sys.stdout.write(text) def box(self): - TOP = '┌' + '─' * (self.width - 2) + '┐' - ROW = '│' + ' ' * (self.width - 2) + '│' - BOTTOM = '└' + '─' * (self.width - 2) + '┘' + top = '┌' + '─' * (self.width - 2) + '┐' + row = '│' + ' ' * (self.width - 2) + '│' + bottom = '└' + '─' * (self.width - 2) + '┘' sys.stdout.write(Window.color([0])) - sys.stdout.write(Window.goto(self.y, self.x) + TOP) + sys.stdout.write(Window.goto(self.y, self.x) + top) for i in range(1, self.height - 1): - sys.stdout.write(Window.goto(self.y + i, self.x) + ROW) - sys.stdout.write(Window.goto(self.y + self.height - 1, self.x) + BOTTOM) + sys.stdout.write(Window.goto(self.y + i, self.x) + row) + sys.stdout.write(Window.goto(self.y + self.height - 1, self.x) + bottom) class VerticalBar: @@ -632,12 +634,14 @@ def render_title(self): display = line.center(self.title.width, '░') self.title.write(i, 0, display) - def render_prompt(self, text_state = None): + def render_prompt(self, text_state=None): if text_state: self.text_state = text_state self.prompt.clear() - self.prompt.write(0, 1, Window.color([90]) if self.in_blink else '', '> ', Window.color([0]), self.prompt_text[self.text_state]) + self.prompt.write(0, 1, + Window.color([90]) if self.in_blink else '', '> ', + Window.color([0]), self.prompt_text[self.text_state]) def tick(self): self.prev_time = self.current_time @@ -732,8 +736,8 @@ def handler(_, __) -> None: 'text': f"{math.ceil(cpu_usage)}%", 'bar': (cpu_usage / 100) }) - except: - pass + except Exception as e: + sys.stderr.write(str(e)) @staticmethod def worker_gpu(queue: Queue, should_close, pids: list): @@ -742,7 +746,8 @@ def handler(_, __) -> None: signal.signal(signal.SIGINT, handler) try: - gpu_usage_counters = ', '.join([r'"\GPU Engine(pid_{}_*)\Utilization Percentage"'.format(pid) for pid in pids]) + gpu_usage_counters_format = r'"\GPU Engine(pid_{}_*)\Utilization Percentage"' + gpu_usage_counters = ', '.join([gpu_usage_counters_format.format(pid) for pid in pids]) gpu_usage_cmd = r'(((Get-Counter {}).CounterSamples | where CookedValue).CookedValue | measure -sum).sum' gpu_usage_cmd = gpu_usage_cmd.format(gpu_usage_counters) while not should_close.is_set(): @@ -755,8 +760,8 @@ def handler(_, __) -> None: 'text': f"{math.ceil(gpu_usage)}%", 'bar': (float(gpu_usage) / 100) }) - except: - pass + except Exception as e: + sys.stderr.write(str(e)) @staticmethod def worker_ram(queue: Queue, should_close, pids: list): @@ -776,8 +781,8 @@ def handler(_, __) -> None: 'text': f"{round(ram_usage, 2)}GB / {round(ram_total, 2)}GB", 'bar': (float(ram_usage) / float(ram_total)) }) - except: - pass + except Exception as e: + sys.stderr.write(str(e)) def main(config): diff --git a/recipes/llm-voice-assistant/python/windows_gui/requirements.txt b/recipes/llm-voice-assistant/python/windows_gui/requirements.txt index 1e9385f..1d97f18 100644 --- a/recipes/llm-voice-assistant/python/windows_gui/requirements.txt +++ b/recipes/llm-voice-assistant/python/windows_gui/requirements.txt @@ -4,5 +4,4 @@ pvorca==1.0.1 pvporcupine==3.0.4 pvrecorder==1.2.4 pvspeaker==1.0.4 -windows-curses==2.4.0; sys_platform == 'win32' -psutil==6.1.1; sys_platform == 'win32' \ No newline at end of file +psutil==6.1.1 \ No newline at end of file From 25bb54b76393d4025dc00e228de66aebd2981a0a Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Fri, 17 Jan 2025 09:38:39 -0800 Subject: [PATCH 5/8] minor performance improvements --- recipes/llm-voice-assistant/python/cli/main.py | 10 ++++++---- recipes/llm-voice-assistant/python/windows_gui/main.py | 9 +++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/recipes/llm-voice-assistant/python/cli/main.py b/recipes/llm-voice-assistant/python/cli/main.py index 4be536e..0eb737d 100644 --- a/recipes/llm-voice-assistant/python/cli/main.py +++ b/recipes/llm-voice-assistant/python/cli/main.py @@ -203,7 +203,7 @@ def flush(self): def interrupt(self): self.orca_connection.send({'command': Commands.INTERRUPT}) while self.orca_connection.poll() and self.orca_connection.recv()['command'] != Commands.INTERRUPT: - time.sleep(0.01) + time.sleep(0.1) self.speaker.interrupt() def tick(self): @@ -248,6 +248,7 @@ def handler(_, __) -> None: text_queue = Queue() while not close: while connection.poll(): + time.sleep(0.1) message = connection.recv() if message['command'] == Commands.CLOSE: close = True @@ -269,7 +270,7 @@ def handler(_, __) -> None: orca_profiler.reset() utterance_end_sec = 0 delay_sec = -1 - if not text_queue.empty(): + while not text_queue.empty(): text = text_queue.get() orca_profiler.tick() pcm = orca_stream.synthesize(text) @@ -321,7 +322,7 @@ def process(self, text: str, utterance_end_sec): def interrupt(self): self.pllm_connection.send({'command': Commands.INTERRUPT}) while self.pllm_connection.poll() and self.pllm_connection.recv()['command'] != Commands.INTERRUPT: - time.sleep(0.01) + time.sleep(0.1) print('', flush=True) self.synthesizer.interrupt() @@ -406,6 +407,7 @@ def llm_task(text): llm_future = None interrupting = False while not close: + time.sleep(0.1) while connection.poll(): message = connection.recv() if message['command'] == Commands.CLOSE: @@ -434,7 +436,7 @@ def llm_task(text): connection.send({'command': Commands.INTERRUPT}) finally: while llm_future and llm_future.done(): - time.sleep(0.01) + time.sleep(0.1) del executor pllm.release() diff --git a/recipes/llm-voice-assistant/python/windows_gui/main.py b/recipes/llm-voice-assistant/python/windows_gui/main.py index 903a221..4eaed84 100644 --- a/recipes/llm-voice-assistant/python/windows_gui/main.py +++ b/recipes/llm-voice-assistant/python/windows_gui/main.py @@ -206,6 +206,7 @@ def handler(_, __) -> None: flushing = False text_queue = Queue() while not close: + time.sleep(0.1) while connection.poll(): message = connection.recv() if message['command'] == Commands.CLOSE: @@ -350,6 +351,7 @@ def llm_task(text): llm_future = None interrupting = False while not close: + time.sleep(0.1) while connection.poll(): message = connection.recv() if message['command'] == Commands.CLOSE: @@ -745,6 +747,9 @@ def handler(_, __) -> None: pass signal.signal(signal.SIGINT, handler) + if not sys.platform.lower().startswith('win'): + return + try: gpu_usage_counters_format = r'"\GPU Engine(pid_{}_*)\Utilization Percentage"' gpu_usage_counters = ', '.join([gpu_usage_counters_format.format(pid) for pid in pids]) @@ -856,10 +861,6 @@ def handler(_, __) -> None: if __name__ == '__main__': - if not sys.platform.lower().startswith('win'): - print('Error: Only runs on Windows platforms') - exit(1) - parser = ArgumentParser() parser.add_argument( '--config', From 6c69a66f42eadd2d515867bdf4c0241553c7d4e7 Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Fri, 17 Jan 2025 12:27:56 -0800 Subject: [PATCH 6/8] minor improvements --- .../python/windows_gui/main.py | 124 ++++++++---------- 1 file changed, 52 insertions(+), 72 deletions(-) diff --git a/recipes/llm-voice-assistant/python/windows_gui/main.py b/recipes/llm-voice-assistant/python/windows_gui/main.py index 4eaed84..27054ef 100644 --- a/recipes/llm-voice-assistant/python/windows_gui/main.py +++ b/recipes/llm-voice-assistant/python/windows_gui/main.py @@ -7,10 +7,10 @@ import sys import time from argparse import ArgumentParser -from concurrent.futures import ThreadPoolExecutor from itertools import chain from multiprocessing import Event, Pipe, Process, Queue, active_children from multiprocessing.connection import Connection +from threading import Thread from typing import Optional, Sequence @@ -87,11 +87,10 @@ def __init__( self.speaking = False self.flushing = False self.pcmBuffer = [] - self.executor = ThreadPoolExecutor() self.future = None def close(self): - self.executor.shutdown() + self.interrupt() def start(self): self.started = True @@ -115,6 +114,7 @@ def tick(self): def stop(): self.speaker.flush() self.speaker.stop() + self.queue.put({'command': Commands.TEXT_STATE, 'state': 1}) if not self.speaking and len(self.pcmBuffer) > self.orca_warmup: self.speaking = True self.speaker.start() @@ -130,10 +130,7 @@ def stop(): self.started = False self.speaking = False self.flushing = False - self.future = self.executor.submit(stop) - if self.future and self.future.done(): - self.future = None - self.queue.put({'command': Commands.TEXT_STATE, 'state': 1}) + Thread(target=stop).start() class Synthesizer: @@ -169,8 +166,6 @@ def flush(self): def interrupt(self): try: self.orca_connection.send({'command': Commands.INTERRUPT}) - while self.orca_connection.poll() and self.orca_connection.recv()['command'] != Commands.INTERRUPT: - time.sleep(0.1) self.speaker.interrupt() except Exception as e: sys.stderr.write(str(e)) @@ -228,19 +223,19 @@ def handler(_, __) -> None: while not text_queue.empty(): text_queue.get() orca_stream.flush() - connection.send({'command': Commands.INTERRUPT}) - if not text_queue.empty(): + while not text_queue.empty(): text = text_queue.get() - pcm = orca_stream.synthesize(text) - if pcm is not None: - connection.send({'command': Commands.SPEAK, 'pcm': pcm}) + if synthesizing: + pcm = orca_stream.synthesize(text) + if pcm is not None: + connection.send({'command': Commands.SPEAK, 'pcm': pcm}) if synthesizing and flushing and text_queue.empty(): synthesizing = False flushing = False pcm = orca_stream.flush() connection.send({'command': Commands.SPEAK, 'pcm': pcm}) connection.send({'command': Commands.FLUSH}) - elif flushing: + elif not synthesizing and flushing and text_queue.empty(): flushing = False finally: orca_stream.close() @@ -273,8 +268,6 @@ def process(self, text: str): def interrupt(self): self.pllm_connection.send({'command': Commands.INTERRUPT}) - while self.pllm_connection.poll() and self.pllm_connection.recv()['command'] != Commands.INTERRUPT: - time.sleep(0.1) self.synthesizer.interrupt() def tick(self): @@ -308,7 +301,6 @@ def handler(_, __) -> None: dialog = pllm.get_dialog(system=config['picollm_system_prompt']) else: dialog = pllm.get_dialog() - generating = False connection.send({'command': Commands.MODEL_NAME, 'name': pllm.model.split(' ')[0]}) @@ -322,64 +314,52 @@ def handler(_, __) -> None: completion = CompletionText(stop_phrases) def llm_callback(text): - if generating: - completion.append(text) - new_tokens = completion.get_new_tokens() - if len(new_tokens) > 0: - connection.send({'command': Commands.SYNTHESIZE, 'text': new_tokens}) - - def llm_task(text): - short_answers_instruction = \ - "You are a voice assistant and your answers are very short but informative" - dialog.add_human_request( - f"{short_answers_instruction}. {text}" if config['short_answers'] else text) - - completion.reset() - return pllm.generate( - prompt=dialog.prompt(), - completion_token_limit=config['picollm_completion_token_limit'], - stop_phrases=stop_phrases, - presence_penalty=config['picollm_presence_penalty'], - frequency_penalty=config['picollm_frequency_penalty'], - temperature=config['picollm_temperature'], - top_p=config['picollm_top_p'], - stream_callback=llm_callback) + completion.append(text) + new_tokens = completion.get_new_tokens() + if len(new_tokens) > 0: + connection.send({'command': Commands.SYNTHESIZE, 'text': new_tokens}) + + close = [False] + prompt = [None] + def event_manager(): + while not close[0]: + message = connection.recv() + if message['command'] == Commands.CLOSE: + close[0] = True + pllm.interrupt() + return + elif message['command'] == Commands.INTERRUPT: + pllm.interrupt() + elif message['command'] == Commands.PROCESS: + prompt[0] = message['text'] + Thread(target=event_manager).start() try: - close = False - executor = ThreadPoolExecutor() - llm_future = None - interrupting = False - while not close: - time.sleep(0.1) - while connection.poll(): - message = connection.recv() - if message['command'] == Commands.CLOSE: - close = True - pllm.interrupt() - elif message['command'] == Commands.PROCESS: - generating = True - text = message['text'] - llm_future = executor.submit(llm_task, text) - elif message['command'] == Commands.INTERRUPT: - interrupting = True - generating = False - pllm.interrupt() - if llm_future and llm_future.done(): - generating = False - llm_result = llm_future.result() - dialog.add_llm_response(llm_result.completion) - if llm_result.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED: - interrupting = False - connection.send({'command': Commands.INTERRUPT}) - else: + while not close[0]: + if prompt[0] is not None: + short_answers_instruction = \ + "You are a voice assistant and your answers are very short but informative" + dialog.add_human_request( + f"{short_answers_instruction}. {prompt[0]}" if config['short_answers'] else prompt[0]) + prompt[0] = None + + completion.reset() + result = pllm.generate( + prompt=dialog.prompt(), + completion_token_limit=config['picollm_completion_token_limit'], + stop_phrases=stop_phrases, + presence_penalty=config['picollm_presence_penalty'], + frequency_penalty=config['picollm_frequency_penalty'], + temperature=config['picollm_temperature'], + top_p=config['picollm_top_p'], + stream_callback=llm_callback) + + dialog.add_llm_response(result.completion) + if result.endpoint != picollm.PicoLLMEndpoints.INTERRUPTED: connection.send({'command': Commands.FLUSH}) - llm_future = None - if not llm_future and interrupting: - interrupting = False - connection.send({'command': Commands.INTERRUPT}) + else: + time.sleep(0.25) finally: - del executor pllm.release() From bbc3b92ad6592967692bbb90f8744a80fce7189e Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Fri, 17 Jan 2025 13:01:15 -0800 Subject: [PATCH 7/8] fixed codestyle --- recipes/llm-voice-assistant/python/windows_gui/main.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/recipes/llm-voice-assistant/python/windows_gui/main.py b/recipes/llm-voice-assistant/python/windows_gui/main.py index 27054ef..8d9c920 100644 --- a/recipes/llm-voice-assistant/python/windows_gui/main.py +++ b/recipes/llm-voice-assistant/python/windows_gui/main.py @@ -192,7 +192,7 @@ def handler(_, __) -> None: signal.signal(signal.SIGINT, handler) orca = pvorca.create(access_key=config['access_key']) - orca_stream = orca.stream_open() + orca_stream = orca.stream_open(speech_rate=config['orca_speech_rate']) connection.send(orca.sample_rate) try: @@ -321,6 +321,7 @@ def llm_callback(text): close = [False] prompt = [None] + def event_manager(): while not close[0]: message = connection.recv() @@ -852,7 +853,7 @@ def handler(_, __) -> None: '--picollm_model_path', help='Absolute path to the file containing LLM parameters (`.pllm`).') parser.add_argument( - '--keyword-model_path', + '--keyword_model_path', help='Absolute path to the keyword model file (`.ppn`). If not set, `Jarvis` will be the wake phrase') parser.add_argument( '--cheetah_endpoint_duration_sec', @@ -904,6 +905,10 @@ def handler(_, __) -> None: type=float, help="Duration of the synthesized audio to buffer before streaming it out. A higher value helps slower " "(e.g., Raspberry Pi) to keep up with real-time at the cost of increasing the initial delay.") + parser.add_argument( + '--orca_speech_rate', + type=float, + help="Rate of speech of the generated audio.") parser.add_argument( '--porcupine_sensitivity', type=float, @@ -941,6 +946,7 @@ def handler(_, __) -> None: 'picollm_top_p': 1, 'picollm_system_prompt': None, 'orca_warmup_sec': 0, + 'orca_speech_rate': 1.0, 'porcupine_sensitivity': 0.5, 'short_answers': False } From 5564bc93dea3fc852567adbe7719406887ee980a Mon Sep 17 00:00:00 2001 From: Matthew Maxwell Date: Fri, 17 Jan 2025 14:17:02 -0800 Subject: [PATCH 8/8] updated cli demo to match windows_gui --- .../llm-voice-assistant/python/cli/main.py | 183 +++++++++--------- 1 file changed, 90 insertions(+), 93 deletions(-) diff --git a/recipes/llm-voice-assistant/python/cli/main.py b/recipes/llm-voice-assistant/python/cli/main.py index 0eb737d..909453c 100644 --- a/recipes/llm-voice-assistant/python/cli/main.py +++ b/recipes/llm-voice-assistant/python/cli/main.py @@ -4,10 +4,10 @@ import sys import time from argparse import ArgumentParser -from concurrent.futures import ThreadPoolExecutor from itertools import chain from multiprocessing import Event, Pipe, Process, Queue, active_children from multiprocessing.connection import Connection +from threading import Thread from typing import Optional, Sequence @@ -128,11 +128,10 @@ def __init__( self.speaking = False self.flushing = False self.pcmBuffer = [] - self.executor = ThreadPoolExecutor() self.future = None def close(self): - self.executor.shutdown() + self.interrupt() def start(self): self.started = True @@ -156,6 +155,8 @@ def tick(self): def stop(): self.speaker.flush() self.speaker.stop() + ppn_prompt = self.config['ppn_prompt'] + print(f'$ Say {ppn_prompt} ...', flush=True) if not self.speaking and len(self.pcmBuffer) > self.orca_warmup: self.speaking = True self.speaker.start() @@ -167,11 +168,7 @@ def stop(): self.started = False self.speaking = False self.flushing = False - self.future = self.executor.submit(stop) - if self.future and self.future.done(): - self.future = None - ppn_prompt = self.config['ppn_prompt'] - print(f'$ Say {ppn_prompt} ...', flush=True) + Thread(target=stop).start() class Synthesizer: @@ -187,8 +184,12 @@ def __init__( self.config = config def close(self): - self.orca_connection.send({'command': Commands.CLOSE}) - self.orca_process.join() + try: + self.orca_connection.send({'command': Commands.CLOSE}) + self.orca_process.join(1.0) + except Exception as e: + sys.stderr.write(str(e)) + self.orca_process.kill() def start(self, utterance_end_sec): self.speaker.start() @@ -201,10 +202,11 @@ def flush(self): self.orca_connection.send({'command': Commands.FLUSH}) def interrupt(self): - self.orca_connection.send({'command': Commands.INTERRUPT}) - while self.orca_connection.poll() and self.orca_connection.recv()['command'] != Commands.INTERRUPT: - time.sleep(0.1) - self.speaker.interrupt() + try: + self.orca_connection.send({'command': Commands.INTERRUPT}) + self.speaker.interrupt() + except Exception as e: + sys.stderr.write(str(e)) def tick(self): while self.orca_connection.poll(): @@ -233,7 +235,7 @@ def handler(_, __) -> None: signal.signal(signal.SIGINT, handler) orca = pvorca.create(access_key=config['access_key']) - orca_stream = orca.stream_open() + orca_stream = orca.stream_open(speech_rate=config['orca_speech_rate']) connection.send(orca.sample_rate) connection.send({'version': orca.version}) @@ -247,11 +249,15 @@ def handler(_, __) -> None: flushing = False text_queue = Queue() while not close: + time.sleep(0.1) while connection.poll(): - time.sleep(0.1) message = connection.recv() if message['command'] == Commands.CLOSE: close = True + synthesizing = False + flushing = False + while not text_queue.empty(): + text_queue.get() elif message['command'] == Commands.START: synthesizing = True utterance_end_sec = message['utterance_end_sec'] @@ -266,19 +272,19 @@ def handler(_, __) -> None: while not text_queue.empty(): text_queue.get() orca_stream.flush() - connection.send({'command': Commands.INTERRUPT}) orca_profiler.reset() utterance_end_sec = 0 delay_sec = -1 while not text_queue.empty(): text = text_queue.get() - orca_profiler.tick() - pcm = orca_stream.synthesize(text) - orca_profiler.tock(pcm) - if pcm is not None: - connection.send({'command': Commands.SPEAK, 'pcm': pcm}) - if delay_sec == -1: - delay_sec = time.perf_counter() - utterance_end_sec + if synthesizing: + orca_profiler.tick() + pcm = orca_stream.synthesize(text) + orca_profiler.tock(pcm) + if pcm is not None: + connection.send({'command': Commands.SPEAK, 'pcm': pcm}) + if delay_sec == -1: + delay_sec = time.perf_counter() - utterance_end_sec if synthesizing and flushing and text_queue.empty(): synthesizing = False flushing = False @@ -309,8 +315,12 @@ def __init__( self.config = config def close(self): - self.pllm_connection.send({'command': Commands.CLOSE}) - self.pllm_process.join() + try: + self.pllm_connection.send({'command': Commands.CLOSE}) + self.pllm_process.join(1.0) + except Exception as e: + sys.stderr.write(str(e)) + self.pllm_process.kill() def process(self, text: str, utterance_end_sec): ppn_prompt = self.config['ppn_prompt'] @@ -321,9 +331,6 @@ def process(self, text: str, utterance_end_sec): def interrupt(self): self.pllm_connection.send({'command': Commands.INTERRUPT}) - while self.pllm_connection.poll() and self.pllm_connection.recv()['command'] != Commands.INTERRUPT: - time.sleep(0.1) - print('', flush=True) self.synthesizer.interrupt() def tick(self): @@ -363,7 +370,6 @@ def handler(_, __) -> None: dialog = pllm.get_dialog(system=config['picollm_system_prompt']) else: dialog = pllm.get_dialog() - generating = False pllm_profiler = TPSProfiler() @@ -378,66 +384,53 @@ def handler(_, __) -> None: def llm_callback(text): pllm_profiler.tock() - if generating: - completion.append(text) - new_tokens = completion.get_new_tokens() - if len(new_tokens) > 0: - connection.send({'command': Commands.SYNTHESIZE, 'text': new_tokens}) - - def llm_task(text): - short_answers_instruction = \ - "You are a voice assistant and your answers are very short but informative" - dialog.add_human_request( - f"{short_answers_instruction}. {text}" if config['short_answers'] else text) - - completion.reset() - return pllm.generate( - prompt=dialog.prompt(), - completion_token_limit=config['picollm_completion_token_limit'], - stop_phrases=stop_phrases, - presence_penalty=config['picollm_presence_penalty'], - frequency_penalty=config['picollm_frequency_penalty'], - temperature=config['picollm_temperature'], - top_p=config['picollm_top_p'], - stream_callback=llm_callback) + completion.append(text) + new_tokens = completion.get_new_tokens() + if len(new_tokens) > 0: + connection.send({'command': Commands.SYNTHESIZE, 'text': new_tokens}) + + close = [False] + prompt = [None] + + def event_manager(): + while not close[0]: + message = connection.recv() + if message['command'] == Commands.CLOSE: + close[0] = True + pllm.interrupt() + return + elif message['command'] == Commands.INTERRUPT: + pllm.interrupt() + elif message['command'] == Commands.PROCESS: + prompt[0] = message['text'] + Thread(target=event_manager).start() try: - close = False - executor = ThreadPoolExecutor() - llm_future = None - interrupting = False - while not close: - time.sleep(0.1) - while connection.poll(): - message = connection.recv() - if message['command'] == Commands.CLOSE: - close = True - elif message['command'] == Commands.PROCESS: - generating = True - text = message['text'] - pllm_profiler.reset() - llm_future = executor.submit(llm_task, text) - elif message['command'] == Commands.INTERRUPT: - interrupting = True - generating = False - pllm.interrupt() - if llm_future and llm_future.done(): - generating = False - llm_result = llm_future.result() - dialog.add_llm_response(llm_result.completion) - if llm_result.endpoint == picollm.PicoLLMEndpoints.INTERRUPTED: - interrupting = False - connection.send({'command': Commands.INTERRUPT}) - else: + while not close[0]: + if prompt[0] is not None: + short_answers_instruction = \ + "You are a voice assistant and your answers are very short but informative" + dialog.add_human_request( + f"{short_answers_instruction}. {prompt[0]}" if config['short_answers'] else prompt[0]) + prompt[0] = None + + completion.reset() + result = pllm.generate( + prompt=dialog.prompt(), + completion_token_limit=config['picollm_completion_token_limit'], + stop_phrases=stop_phrases, + presence_penalty=config['picollm_presence_penalty'], + frequency_penalty=config['picollm_frequency_penalty'], + temperature=config['picollm_temperature'], + top_p=config['picollm_top_p'], + stream_callback=llm_callback) + + dialog.add_llm_response(result.completion) + if result.endpoint != picollm.PicoLLMEndpoints.INTERRUPTED: connection.send({'command': Commands.FLUSH, 'profile': pllm_profiler.tps()}) - llm_future = None - if not llm_future and interrupting: - interrupting = False - connection.send({'command': Commands.INTERRUPT}) + else: + time.sleep(0.25) finally: - while llm_future and llm_future.done(): - time.sleep(0.1) - del executor pllm.release() @@ -578,15 +571,14 @@ def handler(_, __) -> None: try: while not stop[0]: + if not pllm_process.is_alive() or not orca_process.is_alive(): + break + recorder.tick() generator.tick() synthesizer.tick() speaker.tick() finally: - generator.interrupt() - generator.tick() - synthesizer.tick() - speaker.tick() recorder.close() listener.close() generator.close() @@ -594,7 +586,7 @@ def handler(_, __) -> None: speaker.close() for child in active_children(): - child.terminate() + child.kill() porcupine.delete() cheetah.delete() @@ -614,8 +606,8 @@ def handler(_, __) -> None: '--picollm_model_path', help='Absolute path to the file containing LLM parameters (`.pllm`).') parser.add_argument( - '--keyword-model_path', - help='Absolute path to the keyword model file (`.ppn`). If not set, `Picovoice` will be the wake phrase') + '--keyword_model_path', + help='Absolute path to the keyword model file (`.ppn`). If not set, `Jarvis` will be the wake phrase') parser.add_argument( '--cheetah_endpoint_duration_sec', type=float, @@ -666,6 +658,10 @@ def handler(_, __) -> None: type=float, help="Duration of the synthesized audio to buffer before streaming it out. A higher value helps slower " "(e.g., Raspberry Pi) to keep up with real-time at the cost of increasing the initial delay.") + parser.add_argument( + '--orca_speech_rate', + type=float, + help="Rate of speech of the generated audio.") parser.add_argument( '--porcupine_sensitivity', type=float, @@ -704,6 +700,7 @@ def handler(_, __) -> None: 'picollm_top_p': 1, 'picollm_system_prompt': None, 'orca_warmup_sec': 0, + 'orca_speech_rate': 1.0, 'porcupine_sensitivity': 0.5, 'short_answers': False, 'profile': False