From 669b0c56b001c1d48f1716af1769a65e6c2d18d2 Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Sat, 11 May 2024 17:02:15 -0700 Subject: [PATCH 01/19] voice --- recipes/voice-llm/python/main.py | 375 ++++++++++++++++++++++ recipes/voice-llm/python/requirements.txt | 6 + 2 files changed, 381 insertions(+) create mode 100644 recipes/voice-llm/python/main.py create mode 100644 recipes/voice-llm/python/requirements.txt diff --git a/recipes/voice-llm/python/main.py b/recipes/voice-llm/python/main.py new file mode 100644 index 0000000..8bdc08f --- /dev/null +++ b/recipes/voice-llm/python/main.py @@ -0,0 +1,375 @@ +import signal +import time +from argparse import ArgumentParser +from enum import Enum +from multiprocessing import ( + Pipe, + Process, +) +from typing import ( + Optional, + Sequence, +) + +import picollm +import pvcheetah +import pvorca +import pvporcupine +from pvrecorder import PvRecorder + + +class Logger: + class Levels(Enum): + DEBUG = 'DEBUG' + INFO = 'INFO' + + def __init__(self, level: 'Logger.Levels' = Levels.INFO) -> None: + self._level = level + + def debug(self, message: str, end: str = '\n') -> None: + if self._level is self.Levels.DEBUG: + print(message, end=end, flush=True) + + # noinspection PyMethodMayBeStatic + def info(self, message: str, end: str = '\n') -> None: + print(message, end=end, flush=True) + + +class RTFProfiler: + def __init__(self, sample_rate: int) -> None: + self._sample_rate = sample_rate + self._compute_sec = 0. + self._audio_sec = 0. + self._tick_sec = 0. + + def tick(self) -> None: + self._tick_sec = time.time() + + def tock(self, audio: Optional[Sequence[int]] = None) -> None: + self._compute_sec += time.time() - self._tick_sec + self._audio_sec += (len(audio) / self._sample_rate) if audio is not None else 0. + + def rtf(self) -> float: + rtf = self._compute_sec / self._audio_sec + self._compute_sec = 0. + self._audio_sec = 0. + return rtf + + +class TPSProfiler(object): + def __init__(self) -> None: + self._num_tokens = 0 + self._start_sec = 0. + + def tock(self) -> None: + if self._start_sec == 0.: + self._start_sec = time.time() + else: + self._num_tokens += 1 + + def tps(self) -> float: + tps = self._num_tokens / (time.time() - self._start_sec) + self._num_tokens = 0 + self._start_sec = 0. + return tps + + +def orca_worker(access_key: str, connection, warmup_sec: float, stream_frame_sec: int = 0.03) -> None: + # noinspection PyUnresolvedReferences + import numpy as np + from sounddevice import OutputStream + + orca = pvorca.create(access_key=access_key) + orca_stream = orca.stream_open() + + texts = list() + pcm_buffer = list() + warmup = [False] + synthesize = False + flush = False + close = False + utterance_end_sec = 0. + delay_sec = [-1.] + + def callback(data, _, __, ___) -> None: + if warmup[0]: + if len(pcm_buffer) < int(warmup_sec * orca.sample_rate): + data[:, 0] = 0 + return + else: + warmup[0] = False + + if len(pcm_buffer) < data.shape[0]: + pcm_buffer.extend([0] * (data.shape[0] - len(pcm_buffer))) + + data[:, 0] = pcm_buffer[:data.shape[0]] + del pcm_buffer[:data.shape[0]] + + stream = OutputStream( + samplerate=orca.sample_rate, + blocksize=int(stream_frame_sec * orca.sample_rate), + channels=1, + dtype='int16', + callback=callback) + + connection.send({'version': orca.version}) + + orca_profiler = RTFProfiler(orca.sample_rate) + + def buffer_pcm(x: Optional[Sequence[int]]) -> None: + if x is not None: + pcm_buffer.extend(x) + if delay_sec[0] == -1: + delay_sec[0] = time.time() - utterance_end_sec + + while True: + if synthesize and len(texts) > 0: + orca_profiler.tick() + pcm = orca_stream.synthesize(texts.pop(0)) + orca_profiler.tock(pcm) + buffer_pcm(pcm) + elif flush: + while len(texts) > 0: + orca_profiler.tick() + pcm = orca_stream.synthesize(texts.pop(0)) + orca_profiler.tock(pcm) + buffer_pcm(pcm) + orca_profiler.tick() + pcm = orca_stream.flush() + orca_profiler.tock(pcm) + buffer_pcm(pcm) + connection.send({'rtf': orca_profiler.rtf(), 'delay': delay_sec[0]}) + flush = False + while len(pcm_buffer) > 0: + time.sleep(stream_frame_sec) + stream.stop() + delay_sec[0] = -1 + connection.send({'done': True}) + elif close: + break + else: + time.sleep(stream_frame_sec) + + while connection.poll(): + message = connection.recv() + if message['command'] == 'synthesize': + texts.append(message['text']) + if not stream.active: + stream.start() + warmup[0] = True + utterance_end_sec = message['utterance_end_sec'] + synthesize = True + elif message['command'] == 'flush': + synthesize = False + flush = True + elif message['command'] == 'close': + close = True + + stream.close() + orca_stream.close() + orca.delete() + + +def main() -> None: + parser = ArgumentParser() + parser.add_argument( + '--access_key', + required=True, + help='`AccessKey` obtained from `Picovoice Console` (https://console.picovoice.ai/).') + parser.add_argument( + '--picollm_model_path', + required=True, + help='Absolute path to the file containing LLM parameters.') + parser.add_argument( + '--keyword-model_path', + help='Absolute path to the keyword model file. If not set, `Picovoice` will be used as the wake phrase') + parser.add_argument( + '--cheetah_endpoint_duration_sec', + type=float, + default=1., + help="Duration of silence (pause) after the user's utterance to consider it the end of the utterance.") + parser.add_argument( + '--picollm_device', + help="String representation of the device (e.g., CPU or GPU) to use for inference. If set to `best`, picoLLM " + "picks the most suitable device. If set to `gpu`, the engine uses the first available GPU device. To " + "select a specific GPU device, set this argument to `gpu:${GPU_INDEX}`, where `${GPU_INDEX}` is the index " + "of the target GPU. If set to `cpu`, the engine will run on the CPU with the default number of threads. " + "To specify the number of threads, set this argument to `cpu:${NUM_THREADS}`, where `${NUM_THREADS}` is " + "the desired number of threads.") + parser.add_argument( + '--picollm_completion_token_limit', + type=int, + help="Maximum number of tokens in the completion. Set to `None` to impose no limit.") + parser.add_argument( + '--picollm_presence_penalty', + type=float, + default=0., + help="It penalizes logits already appearing in the partial completion if set to a positive value. If set to " + "`0.0`, it has no effect.") + parser.add_argument( + '--picollm_frequency_penalty', + type=float, + default=0., + help="If set to a positive floating-point value, it penalizes logits proportional to the frequency of their " + "appearance in the partial completion. If set to `0.0`, it has no effect.") + parser.add_argument( + '--picollm_temperature', + type=float, + default=0., + help="Sampling temperature. Temperature is a non-negative floating-point value that controls the randomness of " + "the sampler. A higher temperature smoothens the samplers' output, increasing the randomness. In " + "contrast, a lower temperature creates a narrower distribution and reduces variability. Setting it to " + "`0` selects the maximum logit during sampling.") + parser.add_argument( + '--picollm_top_p', + type=float, + default=1., + help="A positive floating-point number within (0, 1]. It restricts the sampler's choices to high-probability " + "logits that form the `top_p` portion of the probability mass. Hence, it avoids randomly selecting " + "unlikely logits. A value of `1.` enables the sampler to pick any token with non-zero probability, " + "turning off the feature.") + parser.add_argument( + '--orca_warmup_sec', + type=float, + default=0., + help="Duration of the synthesized audio to buffer before streaming it out. A higher value helps slower " + "(e.g., Raspberry Pi) to keep up with real-time at the cost of increasing the initial delay.") + parser.add_argument( + '--log_level', + choices=[x.value for x in Logger.Levels], + default=Logger.Levels.INFO.value, + help='Log level verbosity.') + args = parser.parse_args() + + access_key = args.access_key + picollm_model_path = args.picollm_model_path + keyword_model_path = args.keyword_model_path + cheetah_endpoint_duration_sec = args.cheetah_endpoint_duration_sec + picollm_device = args.picollm_device + picollm_completion_token_limit = args.picollm_completion_token_limit + picollm_presence_penalty = args.picollm_presence_penalty + picollm_frequency_penalty = args.picollm_frequency_penalty + picollm_temperature = args.picollm_temperature + picollm_top_p = args.picollm_top_p + orca_warmup_sec = args.orca_warmup_sec + log_level = Logger.Levels(args.log_level) + + log = Logger(log_level) + + if keyword_model_path is None: + porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice']) + else: + porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path]) + log.info(f"→ Porcupine V{porcupine.version}") + + cheetah = pvcheetah.create(access_key=access_key, endpoint_duration_sec=cheetah_endpoint_duration_sec) + log.info(f"→ Cheetah V{cheetah.version}") + + pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device) + dialog = pllm.get_dialog() + log.info(f"→ picoLLM V{pllm.version} {pllm.model}") + + main_connection, orca_process_connection = Pipe() + orca_process = Process(target=orca_worker, args=(access_key, orca_process_connection, orca_warmup_sec)) + orca_process.start() + while not main_connection.poll(): + time.sleep(0.01) + log.info(f"→ Orca V{main_connection.recv()['version']}") + + mic = PvRecorder(frame_length=porcupine.frame_length) + mic.start() + + log.info("\n$ Say `Picovoice` ...") + + stop = [False] + + def handler(_, __) -> None: + stop[0] = True + + signal.signal(signal.SIGINT, handler) + + wake_word_detected = False + human_request = '' + endpoint_reached = False + utterance_end_sec = 0 + + porcupine_profiler = RTFProfiler(porcupine.sample_rate) + cheetah_profiler = RTFProfiler(cheetah.sample_rate) + + try: + while True: + if stop[0]: + break + elif not wake_word_detected: + pcm = mic.read() + porcupine_profiler.tick() + wake_word_detected = porcupine.process(pcm) == 0 + porcupine_profiler.tock(pcm) + if wake_word_detected: + log.debug(f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]") + log.info("$ Wake word detected, utter your request or question ...\n") + log.info("human > ", end='') + elif not endpoint_reached: + pcm = mic.read() + cheetah_profiler.tick() + partial_transcript, endpoint_reached = cheetah.process(pcm) + cheetah_profiler.tock(pcm) + log.info(partial_transcript, end='') + human_request += partial_transcript + if endpoint_reached: + utterance_end_sec = time.time() + cheetah_profiler.tick() + remaining_transcript = cheetah.flush() + cheetah_profiler.tock() + human_request += remaining_transcript + log.info(remaining_transcript, end='\n\n') + log.debug(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]") + else: + dialog.add_human_request(human_request) + + picollm_profiler = TPSProfiler() + + def llm_callback(text: str) -> None: + picollm_profiler.tock() + main_connection.send( + {'command': 'synthesize', 'text': text, 'utterance_end_sec': utterance_end_sec}) + log.info(text, end='') + + log.info("\nllm > ", end='') + res = pllm.generate( + prompt=dialog.prompt(), + completion_token_limit=picollm_completion_token_limit, + presence_penalty=picollm_presence_penalty, + frequency_penalty=picollm_frequency_penalty, + temperature=picollm_temperature, + top_p=picollm_top_p, + stream_callback=llm_callback) + main_connection.send({'command': 'flush'}) + log.info('\n') + dialog.add_llm_response(res.completion) + log.debug(f"[picoLLM TPS: {picollm_profiler.tps():.2f}]") + + while not main_connection.poll(): + time.sleep(0.01) + message = main_connection.recv() + log.debug(f"[Orca RTF: {message['rtf']:.2f}]") + log.debug(f"[Delay: {message['delay']:.2f} sec]") + while not main_connection.poll(): + time.sleep(0.01) + assert main_connection.recv()['done'] + + wake_word_detected = False + human_request = '' + endpoint_reached = False + log.info("\n$ Say `Picovoice` ...") + finally: + main_connection.send({'command': 'close'}) + mic.delete() + pllm.release() + cheetah.delete() + porcupine.delete() + orca_process.join() + + +if __name__ == '__main__': + main() diff --git a/recipes/voice-llm/python/requirements.txt b/recipes/voice-llm/python/requirements.txt new file mode 100644 index 0000000..4657101 --- /dev/null +++ b/recipes/voice-llm/python/requirements.txt @@ -0,0 +1,6 @@ +numpy +pvcheetah==2.0.1 +pvorca==0.2.1 +pvporcupine==3.0.2 +pvrecorder==1.2.2 +sounddevice \ No newline at end of file From ca6bdf0ff09ded06b0678360e502b0bf97bd0995 Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Thu, 16 May 2024 14:29:06 -0700 Subject: [PATCH 02/19] fix --- res/.lint/spell-check/dict.txt | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/res/.lint/spell-check/dict.txt b/res/.lint/spell-check/dict.txt index f400b3f..c2d9a00 100644 --- a/res/.lint/spell-check/dict.txt +++ b/res/.lint/spell-check/dict.txt @@ -1,2 +1,14 @@ +dtype +logit +numpy pico -picovoice \ No newline at end of file +picollm +picovoice +pllm +pvcheetah +pvorca +pvporcupine +pvrecorder +samplerate +sounddevice +tock \ No newline at end of file From e407cd2852b87e1d41191edd72b7ea1be72f10f4 Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Thu, 16 May 2024 14:29:52 -0700 Subject: [PATCH 03/19] clean --- recipes/.gitkeep | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 recipes/.gitkeep diff --git a/recipes/.gitkeep b/recipes/.gitkeep deleted file mode 100644 index e69de29..0000000 From 8ab42a3236fb520fb081b8a90d6978137f25dcfe Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Thu, 16 May 2024 14:34:17 -0700 Subject: [PATCH 04/19] readme --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a8c2de5..149db16 100644 --- a/README.md +++ b/README.md @@ -1 +1,6 @@ -# pico-cookbook \ No newline at end of file +# Pico Cookbook + +Made in Vancouver, Canada by [Picovoice](https://picovoice.ai) + +[![Twitter URL](https://img.shields.io/twitter/url?label=%40AiPicovoice&style=social&url=https%3A%2F%2Ftwitter.com%2FAiPicovoice)](https://twitter.com/AiPicovoice) +[![YouTube Channel Views](https://img.shields.io/youtube/channel/views/UCAdi9sTCXLosG1XeqDwLx7w?label=YouTube&style=social)](https://www.youtube.com/channel/UCAdi9sTCXLosG1XeqDwLx7w) From e3d1c0be8dd2fec39115b2e7b684aeb750f46635 Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Thu, 16 May 2024 14:43:18 -0700 Subject: [PATCH 05/19] doc --- recipes/voice-llm/README.md | 6 ++++++ recipes/voice-llm/python/README.md | 4 ++++ 2 files changed, 10 insertions(+) create mode 100644 recipes/voice-llm/README.md create mode 100644 recipes/voice-llm/python/README.md diff --git a/recipes/voice-llm/README.md b/recipes/voice-llm/README.md new file mode 100644 index 0000000..cffba04 --- /dev/null +++ b/recipes/voice-llm/README.md @@ -0,0 +1,6 @@ +## Components + +- [Porcupine Wake Word](https://picovoice.ai/docs/porcupine/) +- [Cheetah Streaming Speech-to-Text](https://picovoice.ai/docs/cheetah/) +- [picoLLM]() +- [Orca Streaming Text-to-Speech](https://picovoice.ai/docs/orca/) \ No newline at end of file diff --git a/recipes/voice-llm/python/README.md b/recipes/voice-llm/python/README.md new file mode 100644 index 0000000..e5a696e --- /dev/null +++ b/recipes/voice-llm/python/README.md @@ -0,0 +1,4 @@ +## Compatibility + +- Python 3.8+ +- Runs on Linux (x86_64), macOS (arm64, x86_64), Windows (x86_64), and Raspberry Pi (5, 4, and 3). From 0192975e27345c80eb3e7e9f351ff86aa9a1a7ba Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Thu, 16 May 2024 14:55:49 -0700 Subject: [PATCH 06/19] rename --- recipes/llm-voice-assistant/README.md | 14 ++++++++++++++ .../python/README.md | 0 .../python/main.py | 0 .../python/requirements.txt | 0 recipes/voice-llm/README.md | 6 ------ 5 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 recipes/llm-voice-assistant/README.md rename recipes/{voice-llm => llm-voice-assistant}/python/README.md (100%) rename recipes/{voice-llm => llm-voice-assistant}/python/main.py (100%) rename recipes/{voice-llm => llm-voice-assistant}/python/requirements.txt (100%) delete mode 100644 recipes/voice-llm/README.md diff --git a/recipes/llm-voice-assistant/README.md b/recipes/llm-voice-assistant/README.md new file mode 100644 index 0000000..ef7c508 --- /dev/null +++ b/recipes/llm-voice-assistant/README.md @@ -0,0 +1,14 @@ +# LLM-Powered Voice Assistant + +Hands-free voice assistant powered by a large language model (LLM), all voice recognition, LLM inference, and speech synthesis are on-device. + +## Components + +- [Porcupine Wake Word](https://picovoice.ai/docs/porcupine/) +- [Cheetah Streaming Speech-to-Text](https://picovoice.ai/docs/cheetah/) +- [picoLLM Inference Engine]() +- [Orca Streaming Text-to-Speech](https://picovoice.ai/docs/orca/) + +## Implementations + +- [Python](python) diff --git a/recipes/voice-llm/python/README.md b/recipes/llm-voice-assistant/python/README.md similarity index 100% rename from recipes/voice-llm/python/README.md rename to recipes/llm-voice-assistant/python/README.md diff --git a/recipes/voice-llm/python/main.py b/recipes/llm-voice-assistant/python/main.py similarity index 100% rename from recipes/voice-llm/python/main.py rename to recipes/llm-voice-assistant/python/main.py diff --git a/recipes/voice-llm/python/requirements.txt b/recipes/llm-voice-assistant/python/requirements.txt similarity index 100% rename from recipes/voice-llm/python/requirements.txt rename to recipes/llm-voice-assistant/python/requirements.txt diff --git a/recipes/voice-llm/README.md b/recipes/voice-llm/README.md deleted file mode 100644 index cffba04..0000000 --- a/recipes/voice-llm/README.md +++ /dev/null @@ -1,6 +0,0 @@ -## Components - -- [Porcupine Wake Word](https://picovoice.ai/docs/porcupine/) -- [Cheetah Streaming Speech-to-Text](https://picovoice.ai/docs/cheetah/) -- [picoLLM]() -- [Orca Streaming Text-to-Speech](https://picovoice.ai/docs/orca/) \ No newline at end of file From bef778979bde300e3c5cd8ce418a5e1ecd4f3ac9 Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Thu, 16 May 2024 15:58:50 -0700 Subject: [PATCH 07/19] wip --- recipes/llm-voice-assistant/python/README.md | 27 ++++++++++++++++++++ recipes/llm-voice-assistant/python/main.py | 20 +++++++-------- 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/recipes/llm-voice-assistant/python/README.md b/recipes/llm-voice-assistant/python/README.md index e5a696e..61d4652 100644 --- a/recipes/llm-voice-assistant/python/README.md +++ b/recipes/llm-voice-assistant/python/README.md @@ -2,3 +2,30 @@ - Python 3.8+ - Runs on Linux (x86_64), macOS (arm64, x86_64), Windows (x86_64), and Raspberry Pi (5, 4, and 3). + +## AccessKey + +## picoLLM Model + +## Custom Wake Word (Optional) + +## Usage + +```console +pip install -r requirements.txt +``` + +```console +python3 main.py --access_key ${ACCESS_KEY} --picollm_model_path ${PICOLLM_MODEL_PATH} +``` + +```console +python main.py --help +``` + +## Profiling + +```console +python3 main.py --access_key ${ACCESS_KEY} --picollm_model_path ${PICOLLM_MODEL_PATH} --profile +``` + diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index 8bdc08f..ec17742 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -179,10 +179,10 @@ def main() -> None: parser.add_argument( '--picollm_model_path', required=True, - help='Absolute path to the file containing LLM parameters.') + help='Absolute path to the file containing LLM parameters (`.pllm`).') parser.add_argument( '--keyword-model_path', - help='Absolute path to the keyword model file. If not set, `Picovoice` will be used as the wake phrase') + help='Absolute path to the keyword model file (`.ppn`). If not set, `Picovoice` will be the wake phrase') parser.add_argument( '--cheetah_endpoint_duration_sec', type=float, @@ -267,7 +267,7 @@ def main() -> None: pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device) dialog = pllm.get_dialog() - log.info(f"→ picoLLM V{pllm.version} {pllm.model}") + log.info(f"→ picoLLM V{pllm.version} <{pllm.model}>") main_connection, orca_process_connection = Pipe() orca_process = Process(target=orca_worker, args=(access_key, orca_process_connection, orca_warmup_sec)) @@ -289,7 +289,7 @@ def handler(_, __) -> None: signal.signal(signal.SIGINT, handler) wake_word_detected = False - human_request = '' + user_request = '' endpoint_reached = False utterance_end_sec = 0 @@ -308,24 +308,24 @@ def handler(_, __) -> None: if wake_word_detected: log.debug(f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]") log.info("$ Wake word detected, utter your request or question ...\n") - log.info("human > ", end='') + log.info("User > ", end='') elif not endpoint_reached: pcm = mic.read() cheetah_profiler.tick() partial_transcript, endpoint_reached = cheetah.process(pcm) cheetah_profiler.tock(pcm) log.info(partial_transcript, end='') - human_request += partial_transcript + user_request += partial_transcript if endpoint_reached: utterance_end_sec = time.time() cheetah_profiler.tick() remaining_transcript = cheetah.flush() cheetah_profiler.tock() - human_request += remaining_transcript + user_request += remaining_transcript log.info(remaining_transcript, end='\n\n') log.debug(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]") else: - dialog.add_human_request(human_request) + dialog.add_human_request(user_request) picollm_profiler = TPSProfiler() @@ -335,7 +335,7 @@ def llm_callback(text: str) -> None: {'command': 'synthesize', 'text': text, 'utterance_end_sec': utterance_end_sec}) log.info(text, end='') - log.info("\nllm > ", end='') + log.info("\nLLM > ", end='') res = pllm.generate( prompt=dialog.prompt(), completion_token_limit=picollm_completion_token_limit, @@ -359,7 +359,7 @@ def llm_callback(text: str) -> None: assert main_connection.recv()['done'] wake_word_detected = False - human_request = '' + user_request = '' endpoint_reached = False log.info("\n$ Say `Picovoice` ...") finally: From cb5ff8042bf27c12803998941955901d8e48ffb1 Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Thu, 16 May 2024 16:06:41 -0700 Subject: [PATCH 08/19] wip --- recipes/llm-voice-assistant/python/README.md | 6 ++ recipes/llm-voice-assistant/python/main.py | 68 +++++++------------- 2 files changed, 30 insertions(+), 44 deletions(-) diff --git a/recipes/llm-voice-assistant/python/README.md b/recipes/llm-voice-assistant/python/README.md index 61d4652..76bac47 100644 --- a/recipes/llm-voice-assistant/python/README.md +++ b/recipes/llm-voice-assistant/python/README.md @@ -25,6 +25,12 @@ python main.py --help ## Profiling +### Realtime Factor + +### Token per Second + +### Latency + ```console python3 main.py --access_key ${ACCESS_KEY} --picollm_model_path ${PICOLLM_MODEL_PATH} --profile ``` diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index ec17742..bc5d446 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -1,7 +1,6 @@ import signal import time from argparse import ArgumentParser -from enum import Enum from multiprocessing import ( Pipe, Process, @@ -18,23 +17,6 @@ from pvrecorder import PvRecorder -class Logger: - class Levels(Enum): - DEBUG = 'DEBUG' - INFO = 'INFO' - - def __init__(self, level: 'Logger.Levels' = Levels.INFO) -> None: - self._level = level - - def debug(self, message: str, end: str = '\n') -> None: - if self._level is self.Levels.DEBUG: - print(message, end=end, flush=True) - - # noinspection PyMethodMayBeStatic - def info(self, message: str, end: str = '\n') -> None: - print(message, end=end, flush=True) - - class RTFProfiler: def __init__(self, sample_rate: int) -> None: self._sample_rate = sample_rate @@ -234,11 +216,7 @@ def main() -> None: default=0., help="Duration of the synthesized audio to buffer before streaming it out. A higher value helps slower " "(e.g., Raspberry Pi) to keep up with real-time at the cost of increasing the initial delay.") - parser.add_argument( - '--log_level', - choices=[x.value for x in Logger.Levels], - default=Logger.Levels.INFO.value, - help='Log level verbosity.') + parser.add_argument('--profile', action='store_true', help='Show runtime profiling information.') args = parser.parse_args() access_key = args.access_key @@ -252,34 +230,32 @@ def main() -> None: picollm_temperature = args.picollm_temperature picollm_top_p = args.picollm_top_p orca_warmup_sec = args.orca_warmup_sec - log_level = Logger.Levels(args.log_level) - - log = Logger(log_level) + profile = args.profile if keyword_model_path is None: porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice']) else: porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path]) - log.info(f"→ Porcupine V{porcupine.version}") + print(f"→ Porcupine V{porcupine.version}") cheetah = pvcheetah.create(access_key=access_key, endpoint_duration_sec=cheetah_endpoint_duration_sec) - log.info(f"→ Cheetah V{cheetah.version}") + print(f"→ Cheetah V{cheetah.version}") pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device) dialog = pllm.get_dialog() - log.info(f"→ picoLLM V{pllm.version} <{pllm.model}>") + print(f"→ picoLLM V{pllm.version} <{pllm.model}>") main_connection, orca_process_connection = Pipe() orca_process = Process(target=orca_worker, args=(access_key, orca_process_connection, orca_warmup_sec)) orca_process.start() while not main_connection.poll(): time.sleep(0.01) - log.info(f"→ Orca V{main_connection.recv()['version']}") + print(f"→ Orca V{main_connection.recv()['version']}") mic = PvRecorder(frame_length=porcupine.frame_length) mic.start() - log.info("\n$ Say `Picovoice` ...") + print(f"\n$ Say {'Picovoice' if keyword_model_path is None else 'the wake word'} ...") stop = [False] @@ -306,15 +282,16 @@ def handler(_, __) -> None: wake_word_detected = porcupine.process(pcm) == 0 porcupine_profiler.tock(pcm) if wake_word_detected: - log.debug(f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]") - log.info("$ Wake word detected, utter your request or question ...\n") - log.info("User > ", end='') + if profile: + print(f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]") + print("$ Wake word detected, utter your request or question ...\n") + print("User > ", end='', flush=True) elif not endpoint_reached: pcm = mic.read() cheetah_profiler.tick() partial_transcript, endpoint_reached = cheetah.process(pcm) cheetah_profiler.tock(pcm) - log.info(partial_transcript, end='') + print(partial_transcript, end='', flush=True) user_request += partial_transcript if endpoint_reached: utterance_end_sec = time.time() @@ -322,8 +299,9 @@ def handler(_, __) -> None: remaining_transcript = cheetah.flush() cheetah_profiler.tock() user_request += remaining_transcript - log.info(remaining_transcript, end='\n\n') - log.debug(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]") + print(remaining_transcript, end='\n\n') + if profile: + print(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]") else: dialog.add_human_request(user_request) @@ -333,9 +311,9 @@ def llm_callback(text: str) -> None: picollm_profiler.tock() main_connection.send( {'command': 'synthesize', 'text': text, 'utterance_end_sec': utterance_end_sec}) - log.info(text, end='') + print(text, end='', flush=True) - log.info("\nLLM > ", end='') + print("\nLLM > ", end='', flush=True) res = pllm.generate( prompt=dialog.prompt(), completion_token_limit=picollm_completion_token_limit, @@ -345,15 +323,17 @@ def llm_callback(text: str) -> None: top_p=picollm_top_p, stream_callback=llm_callback) main_connection.send({'command': 'flush'}) - log.info('\n') + print('\n') dialog.add_llm_response(res.completion) - log.debug(f"[picoLLM TPS: {picollm_profiler.tps():.2f}]") + if profile: + print(f"[picoLLM TPS: {picollm_profiler.tps():.2f}]") while not main_connection.poll(): time.sleep(0.01) message = main_connection.recv() - log.debug(f"[Orca RTF: {message['rtf']:.2f}]") - log.debug(f"[Delay: {message['delay']:.2f} sec]") + if profile: + print(f"[Orca RTF: {message['rtf']:.2f}]") + print(f"[Delay: {message['delay']:.2f} sec]") while not main_connection.poll(): time.sleep(0.01) assert main_connection.recv()['done'] @@ -361,7 +341,7 @@ def llm_callback(text: str) -> None: wake_word_detected = False user_request = '' endpoint_reached = False - log.info("\n$ Say `Picovoice` ...") + print(f"\n$ Say {'Picovoice' if keyword_model_path is None else 'the wake word'} ...") finally: main_connection.send({'command': 'close'}) mic.delete() From 395801015e508466afea69936362e859c8d226a1 Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Thu, 16 May 2024 16:11:52 -0700 Subject: [PATCH 09/19] minor --- recipes/llm-voice-assistant/python/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index bc5d446..fb8803a 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -255,7 +255,7 @@ def main() -> None: mic = PvRecorder(frame_length=porcupine.frame_length) mic.start() - print(f"\n$ Say {'Picovoice' if keyword_model_path is None else 'the wake word'} ...") + print(f"\n$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...") stop = [False] @@ -341,7 +341,7 @@ def llm_callback(text: str) -> None: wake_word_detected = False user_request = '' endpoint_reached = False - print(f"\n$ Say {'Picovoice' if keyword_model_path is None else 'the wake word'} ...") + print(f"\n$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...") finally: main_connection.send({'command': 'close'}) mic.delete() From 2b8a4c956a924cd9cbfdc3d7889bde1e44fb969a Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Thu, 16 May 2024 16:37:59 -0700 Subject: [PATCH 10/19] wip --- recipes/llm-voice-assistant/python/README.md | 47 +++++++++++++++++--- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/recipes/llm-voice-assistant/python/README.md b/recipes/llm-voice-assistant/python/README.md index 76bac47..8863ec4 100644 --- a/recipes/llm-voice-assistant/python/README.md +++ b/recipes/llm-voice-assistant/python/README.md @@ -5,33 +5,70 @@ ## AccessKey +AccessKey is your authentication and authorization token for deploying Picovoice SDKs, including picoLLM. Anyone who is +using Picovoice needs to have a valid AccessKey. You must keep your AccessKey secret. You would need internet +connectivity to validate your AccessKey with Picovoice license servers even though the LLM inference is running 100% +offline and completely free for open-weight models. Everyone who signs up for +[Picovoice Console](https://console.picovoice.ai/) receives a unique AccessKey. + ## picoLLM Model -## Custom Wake Word (Optional) +picoLLM Inference Engine supports many open-weight models. The models are on +[Picovoice Console](https://console.picovoice.ai/). ## Usage +Install the required packages: + ```console pip install -r requirements.txt ``` +Run the demo: + ```console python3 main.py --access_key ${ACCESS_KEY} --picollm_model_path ${PICOLLM_MODEL_PATH} ``` +Replace `${ACCESS_KEY}` with yours obtained from Picovoice Console and `${PICOLLM_MODEL_PATH}` with the path to the +model downloaded from Picovoice Console. + +To see all available options, type the following: + ```console python main.py --help ``` -## Profiling +## Custom Wake Word (Optional) -### Realtime Factor +The demo's default wake phrase is `Picovoice`. You can generate your custom (branded) wake word using Picovoice Console by following [Porcupine Wake Word documentation (https://picovoice.ai/docs/porcupine/). Once you have the model trained, simply pass it to the demo +application using `--keyword_model_path` argument. -### Token per Second +## Profiling -### Latency +To see the runtime profiling metrics, run the demo with the `--profile` argument: ```console python3 main.py --access_key ${ACCESS_KEY} --picollm_model_path ${PICOLLM_MODEL_PATH} --profile ``` +Replace `${ACCESS_KEY}` with yours obtained from Picovoice Console and `${PICOLLM_MODEL_PATH}` with the path to the +model downloaded from Picovoice Console. + +The demo profiles three metrics: Real-time Factor (RTF), Token per Second (TPS), and Latency. + +### Real-time Factor (RTF) + +RTF is a standard metric for measuring the speed of speech processing (e.g., wake word, speech-to-text, and +text-to-speech). RTF is the CPU time divided by the processed (recognized or synthesized) audio length. Hence, a lower RTF means a more efficient engine. + +### Token per Second (PPS) + +Token per second is the standard metric for measuring the speed of LLM inference engines. TPS is the number of +generated tokens divided by the compute time used to create them. A higher TPS is better. + +### Latency + +We measure the latency as the delay between the end of the user's utterance (i.e., the time when the user finishes talking) and the +time that the voice assistant generates the first chunk of the audio response (i.e., when the user starts hearing the response). + From bffe1a4665a16cba695c46e0a8088500b0779626 Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Thu, 16 May 2024 16:38:42 -0700 Subject: [PATCH 11/19] minor --- recipes/llm-voice-assistant/python/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/llm-voice-assistant/python/README.md b/recipes/llm-voice-assistant/python/README.md index 8863ec4..9081ce6 100644 --- a/recipes/llm-voice-assistant/python/README.md +++ b/recipes/llm-voice-assistant/python/README.md @@ -39,7 +39,7 @@ To see all available options, type the following: python main.py --help ``` -## Custom Wake Word (Optional) +## Custom Wake Word The demo's default wake phrase is `Picovoice`. You can generate your custom (branded) wake word using Picovoice Console by following [Porcupine Wake Word documentation (https://picovoice.ai/docs/porcupine/). Once you have the model trained, simply pass it to the demo application using `--keyword_model_path` argument. From f4598b52df9e47c44b70af09e16400714f1d9849 Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Thu, 16 May 2024 18:13:55 -0700 Subject: [PATCH 12/19] fix --- recipes/llm-voice-assistant/python/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/recipes/llm-voice-assistant/python/requirements.txt b/recipes/llm-voice-assistant/python/requirements.txt index 4657101..ed03197 100644 --- a/recipes/llm-voice-assistant/python/requirements.txt +++ b/recipes/llm-voice-assistant/python/requirements.txt @@ -1,4 +1,5 @@ numpy +picollm==1.0.0 pvcheetah==2.0.1 pvorca==0.2.1 pvporcupine==3.0.2 From e2e576c2abb35f200c9b97b06ddbecd0a278bfff Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Tue, 21 May 2024 11:20:50 -0700 Subject: [PATCH 13/19] rev --- recipes/llm-voice-assistant/README.md | 2 +- recipes/llm-voice-assistant/python/README.md | 2 +- recipes/llm-voice-assistant/python/main.py | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/recipes/llm-voice-assistant/README.md b/recipes/llm-voice-assistant/README.md index ef7c508..958d7ac 100644 --- a/recipes/llm-voice-assistant/README.md +++ b/recipes/llm-voice-assistant/README.md @@ -6,7 +6,7 @@ Hands-free voice assistant powered by a large language model (LLM), all voice re - [Porcupine Wake Word](https://picovoice.ai/docs/porcupine/) - [Cheetah Streaming Speech-to-Text](https://picovoice.ai/docs/cheetah/) -- [picoLLM Inference Engine]() +- [picoLLM Inference Engine](https://github.com/Picovoice/picollm) - [Orca Streaming Text-to-Speech](https://picovoice.ai/docs/orca/) ## Implementations diff --git a/recipes/llm-voice-assistant/python/README.md b/recipes/llm-voice-assistant/python/README.md index 9081ce6..d8bb6de 100644 --- a/recipes/llm-voice-assistant/python/README.md +++ b/recipes/llm-voice-assistant/python/README.md @@ -1,7 +1,7 @@ ## Compatibility - Python 3.8+ -- Runs on Linux (x86_64), macOS (arm64, x86_64), Windows (x86_64), and Raspberry Pi (5, 4, and 3). +- Runs on Linux (x86_64), macOS (arm64, x86_64), Windows (x86_64), and Raspberry Pi (5 and 4). ## AccessKey diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index fb8803a..75b04cd 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -25,10 +25,10 @@ def __init__(self, sample_rate: int) -> None: self._tick_sec = 0. def tick(self) -> None: - self._tick_sec = time.time() + self._tick_sec = time.perf_counter() def tock(self, audio: Optional[Sequence[int]] = None) -> None: - self._compute_sec += time.time() - self._tick_sec + self._compute_sec += time.perf_counter() - self._tick_sec self._audio_sec += (len(audio) / self._sample_rate) if audio is not None else 0. def rtf(self) -> float: @@ -45,12 +45,12 @@ def __init__(self) -> None: def tock(self) -> None: if self._start_sec == 0.: - self._start_sec = time.time() + self._start_sec = time.perf_counter() else: self._num_tokens += 1 def tps(self) -> float: - tps = self._num_tokens / (time.time() - self._start_sec) + tps = self._num_tokens / (time.perf_counter() - self._start_sec) self._num_tokens = 0 self._start_sec = 0. return tps @@ -102,7 +102,7 @@ def buffer_pcm(x: Optional[Sequence[int]]) -> None: if x is not None: pcm_buffer.extend(x) if delay_sec[0] == -1: - delay_sec[0] = time.time() - utterance_end_sec + delay_sec[0] = time.perf_counter() - utterance_end_sec while True: if synthesize and len(texts) > 0: @@ -294,7 +294,7 @@ def handler(_, __) -> None: print(partial_transcript, end='', flush=True) user_request += partial_transcript if endpoint_reached: - utterance_end_sec = time.time() + utterance_end_sec = time.perf_counter() cheetah_profiler.tick() remaining_transcript = cheetah.flush() cheetah_profiler.tock() From c6dea23141af2d9fa0623ac3c66619bad86f6d94 Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Tue, 21 May 2024 12:54:44 -0700 Subject: [PATCH 14/19] eos --- recipes/llm-voice-assistant/python/main.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index 75b04cd..c6f1209 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -307,11 +307,23 @@ def handler(_, __) -> None: picollm_profiler = TPSProfiler() + eos_tokens = { + '', # Gemma + '', # Llama-2, Mistral, and Mixtral + '<|eot_id|>', # Llama-3 + 'Human: ', # Phi-2 + 'Instruct: ', # Phi-2 + } + + completion = [''] + def llm_callback(text: str) -> None: picollm_profiler.tock() - main_connection.send( - {'command': 'synthesize', 'text': text, 'utterance_end_sec': utterance_end_sec}) - print(text, end='', flush=True) + completion[0] += text + if not any(x in completion[0] for x in eos_tokens): + main_connection.send( + {'command': 'synthesize', 'text': text, 'utterance_end_sec': utterance_end_sec}) + print(text, end='', flush=True) print("\nLLM > ", end='', flush=True) res = pllm.generate( From 3bd9daa682a552477c6f6185cf38ead0511bac95 Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Tue, 21 May 2024 14:37:56 -0700 Subject: [PATCH 15/19] rev --- recipes/llm-voice-assistant/python/main.py | 6 +++++- res/.lint/spell-check/dict.txt | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index c6f1209..c6a7a07 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -217,6 +217,7 @@ def main() -> None: help="Duration of the synthesized audio to buffer before streaming it out. A higher value helps slower " "(e.g., Raspberry Pi) to keep up with real-time at the cost of increasing the initial delay.") parser.add_argument('--profile', action='store_true', help='Show runtime profiling information.') + parser.add_argument('--short_answers', action='store_true') args = parser.parse_args() access_key = args.access_key @@ -231,6 +232,7 @@ def main() -> None: picollm_top_p = args.picollm_top_p orca_warmup_sec = args.orca_warmup_sec profile = args.profile + short_answers = args.short_answers if keyword_model_path is None: porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice']) @@ -303,7 +305,9 @@ def handler(_, __) -> None: if profile: print(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]") else: - dialog.add_human_request(user_request) + dialog.add_human_request( + f"Provide a short answer to the following question. {user_request}" if short_answers + else user_request) picollm_profiler = TPSProfiler() diff --git a/res/.lint/spell-check/dict.txt b/res/.lint/spell-check/dict.txt index c2d9a00..f6105db 100644 --- a/res/.lint/spell-check/dict.txt +++ b/res/.lint/spell-check/dict.txt @@ -1,5 +1,6 @@ dtype logit +mixtral numpy pico picollm From 44ae2ee5dc6cd4ed93eaa4ad3eb83fb2fdb5c4f5 Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Tue, 21 May 2024 14:44:01 -0700 Subject: [PATCH 16/19] rev --- recipes/llm-voice-assistant/python/main.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index c6a7a07..26dbe0b 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -240,7 +240,10 @@ def main() -> None: porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path]) print(f"→ Porcupine V{porcupine.version}") - cheetah = pvcheetah.create(access_key=access_key, endpoint_duration_sec=cheetah_endpoint_duration_sec) + cheetah = pvcheetah.create( + access_key=access_key, + endpoint_duration_sec=cheetah_endpoint_duration_sec, + enable_automatic_punctuation=True) print(f"→ Cheetah V{cheetah.version}") pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device) @@ -312,11 +315,10 @@ def handler(_, __) -> None: picollm_profiler = TPSProfiler() eos_tokens = { - '', # Gemma '', # Llama-2, Mistral, and Mixtral + '', # Gemma + '<|endoftext|>', # Phi-2 '<|eot_id|>', # Llama-3 - 'Human: ', # Phi-2 - 'Instruct: ', # Phi-2 } completion = [''] From e638e4775712391930719031b8c8a99daf8d79cb Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Tue, 21 May 2024 14:49:17 -0700 Subject: [PATCH 17/19] fix --- recipes/llm-voice-assistant/python/main.py | 1 + res/.lint/spell-check/dict.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index 26dbe0b..d0f509c 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -181,6 +181,7 @@ def main() -> None: parser.add_argument( '--picollm_completion_token_limit', type=int, + default=256, help="Maximum number of tokens in the completion. Set to `None` to impose no limit.") parser.add_argument( '--picollm_presence_penalty', diff --git a/res/.lint/spell-check/dict.txt b/res/.lint/spell-check/dict.txt index f6105db..1893aea 100644 --- a/res/.lint/spell-check/dict.txt +++ b/res/.lint/spell-check/dict.txt @@ -1,4 +1,5 @@ dtype +endoftext logit mixtral numpy From 5e7d26c79d043c858657c2fd92fe1db964cd1934 Mon Sep 17 00:00:00 2001 From: Alireza Kenarsari Date: Wed, 22 May 2024 09:36:05 -0700 Subject: [PATCH 18/19] rev --- recipes/llm-voice-assistant/python/main.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py index d0f509c..a10fdeb 100644 --- a/recipes/llm-voice-assistant/python/main.py +++ b/recipes/llm-voice-assistant/python/main.py @@ -309,13 +309,14 @@ def handler(_, __) -> None: if profile: print(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]") else: + short_answers_instruction = \ + "You are a voice assistant and your answers are very short but informative" dialog.add_human_request( - f"Provide a short answer to the following question. {user_request}" if short_answers - else user_request) + f"{short_answers_instruction}. {user_request}" if short_answers else user_request) picollm_profiler = TPSProfiler() - eos_tokens = { + stop_phrases = { '', # Llama-2, Mistral, and Mixtral '', # Gemma '<|endoftext|>', # Phi-2 @@ -327,15 +328,18 @@ def handler(_, __) -> None: def llm_callback(text: str) -> None: picollm_profiler.tock() completion[0] += text - if not any(x in completion[0] for x in eos_tokens): - main_connection.send( - {'command': 'synthesize', 'text': text, 'utterance_end_sec': utterance_end_sec}) + if not any(x in completion[0] for x in stop_phrases): + main_connection.send({ + 'command': 'synthesize', + 'text': text.replace('\n', ' . '), + 'utterance_end_sec': utterance_end_sec}) print(text, end='', flush=True) print("\nLLM > ", end='', flush=True) res = pllm.generate( prompt=dialog.prompt(), completion_token_limit=picollm_completion_token_limit, + stop_phrases=stop_phrases, presence_penalty=picollm_presence_penalty, frequency_penalty=picollm_frequency_penalty, temperature=picollm_temperature, From 385b37750b68adab9da6fec7f1e19241202621d4 Mon Sep 17 00:00:00 2001 From: bejager <131197468+bejager@users.noreply.github.com> Date: Fri, 24 May 2024 14:30:12 -0700 Subject: [PATCH 19/19] Update requirements.txt --- recipes/llm-voice-assistant/python/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/llm-voice-assistant/python/requirements.txt b/recipes/llm-voice-assistant/python/requirements.txt index ed03197..f911465 100644 --- a/recipes/llm-voice-assistant/python/requirements.txt +++ b/recipes/llm-voice-assistant/python/requirements.txt @@ -1,7 +1,7 @@ numpy picollm==1.0.0 pvcheetah==2.0.1 -pvorca==0.2.1 +pvorca==0.2.2 pvporcupine==3.0.2 pvrecorder==1.2.2 -sounddevice \ No newline at end of file +sounddevice