From 669b0c56b001c1d48f1716af1769a65e6c2d18d2 Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Sat, 11 May 2024 17:02:15 -0700
Subject: [PATCH 01/19] voice

---
 recipes/voice-llm/python/main.py          | 375 ++++++++++++++++++++++
 recipes/voice-llm/python/requirements.txt |   6 +
 2 files changed, 381 insertions(+)
 create mode 100644 recipes/voice-llm/python/main.py
 create mode 100644 recipes/voice-llm/python/requirements.txt

diff --git a/recipes/voice-llm/python/main.py b/recipes/voice-llm/python/main.py
new file mode 100644
index 0000000..8bdc08f
--- /dev/null
+++ b/recipes/voice-llm/python/main.py
@@ -0,0 +1,375 @@
+import signal
+import time
+from argparse import ArgumentParser
+from enum import Enum
+from multiprocessing import (
+    Pipe,
+    Process,
+)
+from typing import (
+    Optional,
+    Sequence,
+)
+
+import picollm
+import pvcheetah
+import pvorca
+import pvporcupine
+from pvrecorder import PvRecorder
+
+
+class Logger:
+    class Levels(Enum):
+        DEBUG = 'DEBUG'
+        INFO = 'INFO'
+
+    def __init__(self, level: 'Logger.Levels' = Levels.INFO) -> None:
+        self._level = level
+
+    def debug(self, message: str, end: str = '\n') -> None:
+        if self._level is self.Levels.DEBUG:
+            print(message, end=end, flush=True)
+
+    # noinspection PyMethodMayBeStatic
+    def info(self, message: str, end: str = '\n') -> None:
+        print(message, end=end, flush=True)
+
+
+class RTFProfiler:
+    def __init__(self, sample_rate: int) -> None:
+        self._sample_rate = sample_rate
+        self._compute_sec = 0.
+        self._audio_sec = 0.
+        self._tick_sec = 0.
+
+    def tick(self) -> None:
+        self._tick_sec = time.time()
+
+    def tock(self, audio: Optional[Sequence[int]] = None) -> None:
+        self._compute_sec += time.time() - self._tick_sec
+        self._audio_sec += (len(audio) / self._sample_rate) if audio is not None else 0.
+
+    def rtf(self) -> float:
+        rtf = self._compute_sec / self._audio_sec
+        self._compute_sec = 0.
+        self._audio_sec = 0.
+        return rtf
+
+
+class TPSProfiler(object):
+    def __init__(self) -> None:
+        self._num_tokens = 0
+        self._start_sec = 0.
+
+    def tock(self) -> None:
+        if self._start_sec == 0.:
+            self._start_sec = time.time()
+        else:
+            self._num_tokens += 1
+
+    def tps(self) -> float:
+        tps = self._num_tokens / (time.time() - self._start_sec)
+        self._num_tokens = 0
+        self._start_sec = 0.
+        return tps
+
+
+def orca_worker(access_key: str, connection, warmup_sec: float, stream_frame_sec: int = 0.03) -> None:
+    # noinspection PyUnresolvedReferences
+    import numpy as np
+    from sounddevice import OutputStream
+
+    orca = pvorca.create(access_key=access_key)
+    orca_stream = orca.stream_open()
+
+    texts = list()
+    pcm_buffer = list()
+    warmup = [False]
+    synthesize = False
+    flush = False
+    close = False
+    utterance_end_sec = 0.
+    delay_sec = [-1.]
+
+    def callback(data, _, __, ___) -> None:
+        if warmup[0]:
+            if len(pcm_buffer) < int(warmup_sec * orca.sample_rate):
+                data[:, 0] = 0
+                return
+            else:
+                warmup[0] = False
+
+        if len(pcm_buffer) < data.shape[0]:
+            pcm_buffer.extend([0] * (data.shape[0] - len(pcm_buffer)))
+
+        data[:, 0] = pcm_buffer[:data.shape[0]]
+        del pcm_buffer[:data.shape[0]]
+
+    stream = OutputStream(
+        samplerate=orca.sample_rate,
+        blocksize=int(stream_frame_sec * orca.sample_rate),
+        channels=1,
+        dtype='int16',
+        callback=callback)
+
+    connection.send({'version': orca.version})
+
+    orca_profiler = RTFProfiler(orca.sample_rate)
+
+    def buffer_pcm(x: Optional[Sequence[int]]) -> None:
+        if x is not None:
+            pcm_buffer.extend(x)
+            if delay_sec[0] == -1:
+                delay_sec[0] = time.time() - utterance_end_sec
+
+    while True:
+        if synthesize and len(texts) > 0:
+            orca_profiler.tick()
+            pcm = orca_stream.synthesize(texts.pop(0))
+            orca_profiler.tock(pcm)
+            buffer_pcm(pcm)
+        elif flush:
+            while len(texts) > 0:
+                orca_profiler.tick()
+                pcm = orca_stream.synthesize(texts.pop(0))
+                orca_profiler.tock(pcm)
+                buffer_pcm(pcm)
+            orca_profiler.tick()
+            pcm = orca_stream.flush()
+            orca_profiler.tock(pcm)
+            buffer_pcm(pcm)
+            connection.send({'rtf': orca_profiler.rtf(), 'delay': delay_sec[0]})
+            flush = False
+            while len(pcm_buffer) > 0:
+                time.sleep(stream_frame_sec)
+            stream.stop()
+            delay_sec[0] = -1
+            connection.send({'done': True})
+        elif close:
+            break
+        else:
+            time.sleep(stream_frame_sec)
+
+        while connection.poll():
+            message = connection.recv()
+            if message['command'] == 'synthesize':
+                texts.append(message['text'])
+                if not stream.active:
+                    stream.start()
+                    warmup[0] = True
+                utterance_end_sec = message['utterance_end_sec']
+                synthesize = True
+            elif message['command'] == 'flush':
+                synthesize = False
+                flush = True
+            elif message['command'] == 'close':
+                close = True
+
+    stream.close()
+    orca_stream.close()
+    orca.delete()
+
+
+def main() -> None:
+    parser = ArgumentParser()
+    parser.add_argument(
+        '--access_key',
+        required=True,
+        help='`AccessKey` obtained from `Picovoice Console` (https://console.picovoice.ai/).')
+    parser.add_argument(
+        '--picollm_model_path',
+        required=True,
+        help='Absolute path to the file containing LLM parameters.')
+    parser.add_argument(
+        '--keyword-model_path',
+        help='Absolute path to the keyword model file. If not set, `Picovoice` will be used as the wake phrase')
+    parser.add_argument(
+        '--cheetah_endpoint_duration_sec',
+        type=float,
+        default=1.,
+        help="Duration of silence (pause) after the user's utterance to consider it the end of the utterance.")
+    parser.add_argument(
+        '--picollm_device',
+        help="String representation of the device (e.g., CPU or GPU) to use for inference. If set to `best`, picoLLM "
+             "picks the most suitable device. If set to `gpu`, the engine uses the first available GPU device. To "
+             "select a specific GPU device, set this argument to `gpu:${GPU_INDEX}`, where `${GPU_INDEX}` is the index "
+             "of the target GPU. If set to `cpu`, the engine will run on the CPU with the default number of threads. "
+             "To specify the number of threads, set this argument to `cpu:${NUM_THREADS}`, where `${NUM_THREADS}` is "
+             "the desired number of threads.")
+    parser.add_argument(
+        '--picollm_completion_token_limit',
+        type=int,
+        help="Maximum number of tokens in the completion. Set to `None` to impose no limit.")
+    parser.add_argument(
+        '--picollm_presence_penalty',
+        type=float,
+        default=0.,
+        help="It penalizes logits already appearing in the partial completion if set to a positive value. If set to "
+             "`0.0`, it has no effect.")
+    parser.add_argument(
+        '--picollm_frequency_penalty',
+        type=float,
+        default=0.,
+        help="If set to a positive floating-point value, it penalizes logits proportional to the frequency of their "
+             "appearance in the partial completion. If set to `0.0`, it has no effect.")
+    parser.add_argument(
+        '--picollm_temperature',
+        type=float,
+        default=0.,
+        help="Sampling temperature. Temperature is a non-negative floating-point value that controls the randomness of "
+             "the sampler. A higher temperature smoothens the samplers' output, increasing the randomness. In "
+             "contrast, a lower temperature creates a narrower distribution and reduces variability. Setting it to "
+             "`0` selects the maximum logit during sampling.")
+    parser.add_argument(
+        '--picollm_top_p',
+        type=float,
+        default=1.,
+        help="A positive floating-point number within (0, 1]. It restricts the sampler's choices to high-probability "
+             "logits that form the `top_p` portion of the probability mass. Hence, it avoids randomly selecting "
+             "unlikely logits. A value of `1.` enables the sampler to pick any token with non-zero probability, "
+             "turning off the feature.")
+    parser.add_argument(
+        '--orca_warmup_sec',
+        type=float,
+        default=0.,
+        help="Duration of the synthesized audio to buffer before streaming it out. A higher value helps slower "
+             "(e.g., Raspberry Pi) to keep up with real-time at the cost of increasing the initial delay.")
+    parser.add_argument(
+        '--log_level',
+        choices=[x.value for x in Logger.Levels],
+        default=Logger.Levels.INFO.value,
+        help='Log level verbosity.')
+    args = parser.parse_args()
+
+    access_key = args.access_key
+    picollm_model_path = args.picollm_model_path
+    keyword_model_path = args.keyword_model_path
+    cheetah_endpoint_duration_sec = args.cheetah_endpoint_duration_sec
+    picollm_device = args.picollm_device
+    picollm_completion_token_limit = args.picollm_completion_token_limit
+    picollm_presence_penalty = args.picollm_presence_penalty
+    picollm_frequency_penalty = args.picollm_frequency_penalty
+    picollm_temperature = args.picollm_temperature
+    picollm_top_p = args.picollm_top_p
+    orca_warmup_sec = args.orca_warmup_sec
+    log_level = Logger.Levels(args.log_level)
+
+    log = Logger(log_level)
+
+    if keyword_model_path is None:
+        porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice'])
+    else:
+        porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path])
+    log.info(f"→ Porcupine V{porcupine.version}")
+
+    cheetah = pvcheetah.create(access_key=access_key, endpoint_duration_sec=cheetah_endpoint_duration_sec)
+    log.info(f"→ Cheetah V{cheetah.version}")
+
+    pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device)
+    dialog = pllm.get_dialog()
+    log.info(f"→ picoLLM V{pllm.version} {pllm.model}")
+
+    main_connection, orca_process_connection = Pipe()
+    orca_process = Process(target=orca_worker, args=(access_key, orca_process_connection, orca_warmup_sec))
+    orca_process.start()
+    while not main_connection.poll():
+        time.sleep(0.01)
+    log.info(f"→ Orca V{main_connection.recv()['version']}")
+
+    mic = PvRecorder(frame_length=porcupine.frame_length)
+    mic.start()
+
+    log.info("\n$ Say `Picovoice` ...")
+
+    stop = [False]
+
+    def handler(_, __) -> None:
+        stop[0] = True
+
+    signal.signal(signal.SIGINT, handler)
+
+    wake_word_detected = False
+    human_request = ''
+    endpoint_reached = False
+    utterance_end_sec = 0
+
+    porcupine_profiler = RTFProfiler(porcupine.sample_rate)
+    cheetah_profiler = RTFProfiler(cheetah.sample_rate)
+
+    try:
+        while True:
+            if stop[0]:
+                break
+            elif not wake_word_detected:
+                pcm = mic.read()
+                porcupine_profiler.tick()
+                wake_word_detected = porcupine.process(pcm) == 0
+                porcupine_profiler.tock(pcm)
+                if wake_word_detected:
+                    log.debug(f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]")
+                    log.info("$ Wake word detected, utter your request or question ...\n")
+                    log.info("human > ", end='')
+            elif not endpoint_reached:
+                pcm = mic.read()
+                cheetah_profiler.tick()
+                partial_transcript, endpoint_reached = cheetah.process(pcm)
+                cheetah_profiler.tock(pcm)
+                log.info(partial_transcript, end='')
+                human_request += partial_transcript
+                if endpoint_reached:
+                    utterance_end_sec = time.time()
+                    cheetah_profiler.tick()
+                    remaining_transcript = cheetah.flush()
+                    cheetah_profiler.tock()
+                    human_request += remaining_transcript
+                    log.info(remaining_transcript, end='\n\n')
+                    log.debug(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]")
+            else:
+                dialog.add_human_request(human_request)
+
+                picollm_profiler = TPSProfiler()
+
+                def llm_callback(text: str) -> None:
+                    picollm_profiler.tock()
+                    main_connection.send(
+                        {'command': 'synthesize', 'text': text, 'utterance_end_sec': utterance_end_sec})
+                    log.info(text, end='')
+
+                log.info("\nllm > ", end='')
+                res = pllm.generate(
+                    prompt=dialog.prompt(),
+                    completion_token_limit=picollm_completion_token_limit,
+                    presence_penalty=picollm_presence_penalty,
+                    frequency_penalty=picollm_frequency_penalty,
+                    temperature=picollm_temperature,
+                    top_p=picollm_top_p,
+                    stream_callback=llm_callback)
+                main_connection.send({'command': 'flush'})
+                log.info('\n')
+                dialog.add_llm_response(res.completion)
+                log.debug(f"[picoLLM TPS: {picollm_profiler.tps():.2f}]")
+
+                while not main_connection.poll():
+                    time.sleep(0.01)
+                message = main_connection.recv()
+                log.debug(f"[Orca RTF: {message['rtf']:.2f}]")
+                log.debug(f"[Delay: {message['delay']:.2f} sec]")
+                while not main_connection.poll():
+                    time.sleep(0.01)
+                assert main_connection.recv()['done']
+
+                wake_word_detected = False
+                human_request = ''
+                endpoint_reached = False
+                log.info("\n$ Say `Picovoice` ...")
+    finally:
+        main_connection.send({'command': 'close'})
+        mic.delete()
+        pllm.release()
+        cheetah.delete()
+        porcupine.delete()
+        orca_process.join()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/recipes/voice-llm/python/requirements.txt b/recipes/voice-llm/python/requirements.txt
new file mode 100644
index 0000000..4657101
--- /dev/null
+++ b/recipes/voice-llm/python/requirements.txt
@@ -0,0 +1,6 @@
+numpy
+pvcheetah==2.0.1
+pvorca==0.2.1
+pvporcupine==3.0.2
+pvrecorder==1.2.2
+sounddevice
\ No newline at end of file

From ca6bdf0ff09ded06b0678360e502b0bf97bd0995 Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Thu, 16 May 2024 14:29:06 -0700
Subject: [PATCH 02/19] fix

---
 res/.lint/spell-check/dict.txt | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/res/.lint/spell-check/dict.txt b/res/.lint/spell-check/dict.txt
index f400b3f..c2d9a00 100644
--- a/res/.lint/spell-check/dict.txt
+++ b/res/.lint/spell-check/dict.txt
@@ -1,2 +1,14 @@
+dtype
+logit
+numpy
 pico
-picovoice
\ No newline at end of file
+picollm
+picovoice
+pllm
+pvcheetah
+pvorca
+pvporcupine
+pvrecorder
+samplerate
+sounddevice
+tock
\ No newline at end of file

From e407cd2852b87e1d41191edd72b7ea1be72f10f4 Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Thu, 16 May 2024 14:29:52 -0700
Subject: [PATCH 03/19] clean

---
 recipes/.gitkeep | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 recipes/.gitkeep

diff --git a/recipes/.gitkeep b/recipes/.gitkeep
deleted file mode 100644
index e69de29..0000000

From 8ab42a3236fb520fb081b8a90d6978137f25dcfe Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Thu, 16 May 2024 14:34:17 -0700
Subject: [PATCH 04/19] readme

---
 README.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a8c2de5..149db16 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,6 @@
-# pico-cookbook
\ No newline at end of file
+# Pico Cookbook
+
+Made in Vancouver, Canada by [Picovoice](https://picovoice.ai)
+
+[![Twitter URL](https://img.shields.io/twitter/url?label=%40AiPicovoice&style=social&url=https%3A%2F%2Ftwitter.com%2FAiPicovoice)](https://twitter.com/AiPicovoice)<!-- markdown-link-check-disable-line -->
+[![YouTube Channel Views](https://img.shields.io/youtube/channel/views/UCAdi9sTCXLosG1XeqDwLx7w?label=YouTube&style=social)](https://www.youtube.com/channel/UCAdi9sTCXLosG1XeqDwLx7w)

From e3d1c0be8dd2fec39115b2e7b684aeb750f46635 Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Thu, 16 May 2024 14:43:18 -0700
Subject: [PATCH 05/19] doc

---
 recipes/voice-llm/README.md        | 6 ++++++
 recipes/voice-llm/python/README.md | 4 ++++
 2 files changed, 10 insertions(+)
 create mode 100644 recipes/voice-llm/README.md
 create mode 100644 recipes/voice-llm/python/README.md

diff --git a/recipes/voice-llm/README.md b/recipes/voice-llm/README.md
new file mode 100644
index 0000000..cffba04
--- /dev/null
+++ b/recipes/voice-llm/README.md
@@ -0,0 +1,6 @@
+## Components
+
+- [Porcupine Wake Word](https://picovoice.ai/docs/porcupine/)
+- [Cheetah Streaming Speech-to-Text](https://picovoice.ai/docs/cheetah/)
+- [picoLLM]()
+- [Orca Streaming Text-to-Speech](https://picovoice.ai/docs/orca/)
\ No newline at end of file
diff --git a/recipes/voice-llm/python/README.md b/recipes/voice-llm/python/README.md
new file mode 100644
index 0000000..e5a696e
--- /dev/null
+++ b/recipes/voice-llm/python/README.md
@@ -0,0 +1,4 @@
+## Compatibility
+
+- Python 3.8+
+- Runs on Linux (x86_64), macOS (arm64, x86_64), Windows (x86_64), and Raspberry Pi (5, 4, and 3).

From 0192975e27345c80eb3e7e9f351ff86aa9a1a7ba Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Thu, 16 May 2024 14:55:49 -0700
Subject: [PATCH 06/19] rename

---
 recipes/llm-voice-assistant/README.md              | 14 ++++++++++++++
 .../python/README.md                               |  0
 .../python/main.py                                 |  0
 .../python/requirements.txt                        |  0
 recipes/voice-llm/README.md                        |  6 ------
 5 files changed, 14 insertions(+), 6 deletions(-)
 create mode 100644 recipes/llm-voice-assistant/README.md
 rename recipes/{voice-llm => llm-voice-assistant}/python/README.md (100%)
 rename recipes/{voice-llm => llm-voice-assistant}/python/main.py (100%)
 rename recipes/{voice-llm => llm-voice-assistant}/python/requirements.txt (100%)
 delete mode 100644 recipes/voice-llm/README.md

diff --git a/recipes/llm-voice-assistant/README.md b/recipes/llm-voice-assistant/README.md
new file mode 100644
index 0000000..ef7c508
--- /dev/null
+++ b/recipes/llm-voice-assistant/README.md
@@ -0,0 +1,14 @@
+# LLM-Powered Voice Assistant
+
+Hands-free voice assistant powered by a large language model (LLM), all voice recognition, LLM inference, and speech synthesis are on-device.
+
+## Components
+
+- [Porcupine Wake Word](https://picovoice.ai/docs/porcupine/)
+- [Cheetah Streaming Speech-to-Text](https://picovoice.ai/docs/cheetah/)
+- [picoLLM Inference Engine]()
+- [Orca Streaming Text-to-Speech](https://picovoice.ai/docs/orca/)
+
+## Implementations
+
+- [Python](python)
diff --git a/recipes/voice-llm/python/README.md b/recipes/llm-voice-assistant/python/README.md
similarity index 100%
rename from recipes/voice-llm/python/README.md
rename to recipes/llm-voice-assistant/python/README.md
diff --git a/recipes/voice-llm/python/main.py b/recipes/llm-voice-assistant/python/main.py
similarity index 100%
rename from recipes/voice-llm/python/main.py
rename to recipes/llm-voice-assistant/python/main.py
diff --git a/recipes/voice-llm/python/requirements.txt b/recipes/llm-voice-assistant/python/requirements.txt
similarity index 100%
rename from recipes/voice-llm/python/requirements.txt
rename to recipes/llm-voice-assistant/python/requirements.txt
diff --git a/recipes/voice-llm/README.md b/recipes/voice-llm/README.md
deleted file mode 100644
index cffba04..0000000
--- a/recipes/voice-llm/README.md
+++ /dev/null
@@ -1,6 +0,0 @@
-## Components
-
-- [Porcupine Wake Word](https://picovoice.ai/docs/porcupine/)
-- [Cheetah Streaming Speech-to-Text](https://picovoice.ai/docs/cheetah/)
-- [picoLLM]()
-- [Orca Streaming Text-to-Speech](https://picovoice.ai/docs/orca/)
\ No newline at end of file

From bef778979bde300e3c5cd8ce418a5e1ecd4f3ac9 Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Thu, 16 May 2024 15:58:50 -0700
Subject: [PATCH 07/19] wip

---
 recipes/llm-voice-assistant/python/README.md | 27 ++++++++++++++++++++
 recipes/llm-voice-assistant/python/main.py   | 20 +++++++--------
 2 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/README.md b/recipes/llm-voice-assistant/python/README.md
index e5a696e..61d4652 100644
--- a/recipes/llm-voice-assistant/python/README.md
+++ b/recipes/llm-voice-assistant/python/README.md
@@ -2,3 +2,30 @@
 
 - Python 3.8+
 - Runs on Linux (x86_64), macOS (arm64, x86_64), Windows (x86_64), and Raspberry Pi (5, 4, and 3).
+
+## AccessKey
+
+## picoLLM Model
+
+## Custom Wake Word (Optional)
+
+## Usage
+
+```console
+pip install -r requirements.txt
+```
+
+```console
+python3 main.py --access_key ${ACCESS_KEY} --picollm_model_path ${PICOLLM_MODEL_PATH} 
+```
+
+```console
+python main.py --help
+```
+
+## Profiling
+
+```console
+python3 main.py --access_key ${ACCESS_KEY} --picollm_model_path ${PICOLLM_MODEL_PATH} --profile 
+```
+
diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index 8bdc08f..ec17742 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -179,10 +179,10 @@ def main() -> None:
     parser.add_argument(
         '--picollm_model_path',
         required=True,
-        help='Absolute path to the file containing LLM parameters.')
+        help='Absolute path to the file containing LLM parameters (`.pllm`).')
     parser.add_argument(
         '--keyword-model_path',
-        help='Absolute path to the keyword model file. If not set, `Picovoice` will be used as the wake phrase')
+        help='Absolute path to the keyword model file (`.ppn`). If not set, `Picovoice` will be the wake phrase')
     parser.add_argument(
         '--cheetah_endpoint_duration_sec',
         type=float,
@@ -267,7 +267,7 @@ def main() -> None:
 
     pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device)
     dialog = pllm.get_dialog()
-    log.info(f"→ picoLLM V{pllm.version} {pllm.model}")
+    log.info(f"→ picoLLM V{pllm.version} <{pllm.model}>")
 
     main_connection, orca_process_connection = Pipe()
     orca_process = Process(target=orca_worker, args=(access_key, orca_process_connection, orca_warmup_sec))
@@ -289,7 +289,7 @@ def handler(_, __) -> None:
     signal.signal(signal.SIGINT, handler)
 
     wake_word_detected = False
-    human_request = ''
+    user_request = ''
     endpoint_reached = False
     utterance_end_sec = 0
 
@@ -308,24 +308,24 @@ def handler(_, __) -> None:
                 if wake_word_detected:
                     log.debug(f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]")
                     log.info("$ Wake word detected, utter your request or question ...\n")
-                    log.info("human > ", end='')
+                    log.info("User > ", end='')
             elif not endpoint_reached:
                 pcm = mic.read()
                 cheetah_profiler.tick()
                 partial_transcript, endpoint_reached = cheetah.process(pcm)
                 cheetah_profiler.tock(pcm)
                 log.info(partial_transcript, end='')
-                human_request += partial_transcript
+                user_request += partial_transcript
                 if endpoint_reached:
                     utterance_end_sec = time.time()
                     cheetah_profiler.tick()
                     remaining_transcript = cheetah.flush()
                     cheetah_profiler.tock()
-                    human_request += remaining_transcript
+                    user_request += remaining_transcript
                     log.info(remaining_transcript, end='\n\n')
                     log.debug(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]")
             else:
-                dialog.add_human_request(human_request)
+                dialog.add_human_request(user_request)
 
                 picollm_profiler = TPSProfiler()
 
@@ -335,7 +335,7 @@ def llm_callback(text: str) -> None:
                         {'command': 'synthesize', 'text': text, 'utterance_end_sec': utterance_end_sec})
                     log.info(text, end='')
 
-                log.info("\nllm > ", end='')
+                log.info("\nLLM > ", end='')
                 res = pllm.generate(
                     prompt=dialog.prompt(),
                     completion_token_limit=picollm_completion_token_limit,
@@ -359,7 +359,7 @@ def llm_callback(text: str) -> None:
                 assert main_connection.recv()['done']
 
                 wake_word_detected = False
-                human_request = ''
+                user_request = ''
                 endpoint_reached = False
                 log.info("\n$ Say `Picovoice` ...")
     finally:

From cb5ff8042bf27c12803998941955901d8e48ffb1 Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Thu, 16 May 2024 16:06:41 -0700
Subject: [PATCH 08/19] wip

---
 recipes/llm-voice-assistant/python/README.md |  6 ++
 recipes/llm-voice-assistant/python/main.py   | 68 +++++++-------------
 2 files changed, 30 insertions(+), 44 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/README.md b/recipes/llm-voice-assistant/python/README.md
index 61d4652..76bac47 100644
--- a/recipes/llm-voice-assistant/python/README.md
+++ b/recipes/llm-voice-assistant/python/README.md
@@ -25,6 +25,12 @@ python main.py --help
 
 ## Profiling
 
+### Realtime Factor
+
+### Token per Second
+
+### Latency
+
 ```console
 python3 main.py --access_key ${ACCESS_KEY} --picollm_model_path ${PICOLLM_MODEL_PATH} --profile 
 ```
diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index ec17742..bc5d446 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -1,7 +1,6 @@
 import signal
 import time
 from argparse import ArgumentParser
-from enum import Enum
 from multiprocessing import (
     Pipe,
     Process,
@@ -18,23 +17,6 @@
 from pvrecorder import PvRecorder
 
 
-class Logger:
-    class Levels(Enum):
-        DEBUG = 'DEBUG'
-        INFO = 'INFO'
-
-    def __init__(self, level: 'Logger.Levels' = Levels.INFO) -> None:
-        self._level = level
-
-    def debug(self, message: str, end: str = '\n') -> None:
-        if self._level is self.Levels.DEBUG:
-            print(message, end=end, flush=True)
-
-    # noinspection PyMethodMayBeStatic
-    def info(self, message: str, end: str = '\n') -> None:
-        print(message, end=end, flush=True)
-
-
 class RTFProfiler:
     def __init__(self, sample_rate: int) -> None:
         self._sample_rate = sample_rate
@@ -234,11 +216,7 @@ def main() -> None:
         default=0.,
         help="Duration of the synthesized audio to buffer before streaming it out. A higher value helps slower "
              "(e.g., Raspberry Pi) to keep up with real-time at the cost of increasing the initial delay.")
-    parser.add_argument(
-        '--log_level',
-        choices=[x.value for x in Logger.Levels],
-        default=Logger.Levels.INFO.value,
-        help='Log level verbosity.')
+    parser.add_argument('--profile', action='store_true', help='Show runtime profiling information.')
     args = parser.parse_args()
 
     access_key = args.access_key
@@ -252,34 +230,32 @@ def main() -> None:
     picollm_temperature = args.picollm_temperature
     picollm_top_p = args.picollm_top_p
     orca_warmup_sec = args.orca_warmup_sec
-    log_level = Logger.Levels(args.log_level)
-
-    log = Logger(log_level)
+    profile = args.profile
 
     if keyword_model_path is None:
         porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice'])
     else:
         porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path])
-    log.info(f"→ Porcupine V{porcupine.version}")
+    print(f"→ Porcupine V{porcupine.version}")
 
     cheetah = pvcheetah.create(access_key=access_key, endpoint_duration_sec=cheetah_endpoint_duration_sec)
-    log.info(f"→ Cheetah V{cheetah.version}")
+    print(f"→ Cheetah V{cheetah.version}")
 
     pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device)
     dialog = pllm.get_dialog()
-    log.info(f"→ picoLLM V{pllm.version} <{pllm.model}>")
+    print(f"→ picoLLM V{pllm.version} <{pllm.model}>")
 
     main_connection, orca_process_connection = Pipe()
     orca_process = Process(target=orca_worker, args=(access_key, orca_process_connection, orca_warmup_sec))
     orca_process.start()
     while not main_connection.poll():
         time.sleep(0.01)
-    log.info(f"→ Orca V{main_connection.recv()['version']}")
+    print(f"→ Orca V{main_connection.recv()['version']}")
 
     mic = PvRecorder(frame_length=porcupine.frame_length)
     mic.start()
 
-    log.info("\n$ Say `Picovoice` ...")
+    print(f"\n$ Say {'Picovoice' if keyword_model_path is None else 'the wake word'} ...")
 
     stop = [False]
 
@@ -306,15 +282,16 @@ def handler(_, __) -> None:
                 wake_word_detected = porcupine.process(pcm) == 0
                 porcupine_profiler.tock(pcm)
                 if wake_word_detected:
-                    log.debug(f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]")
-                    log.info("$ Wake word detected, utter your request or question ...\n")
-                    log.info("User > ", end='')
+                    if profile:
+                        print(f"[Porcupine RTF: {porcupine_profiler.rtf():.3f}]")
+                    print("$ Wake word detected, utter your request or question ...\n")
+                    print("User > ", end='', flush=True)
             elif not endpoint_reached:
                 pcm = mic.read()
                 cheetah_profiler.tick()
                 partial_transcript, endpoint_reached = cheetah.process(pcm)
                 cheetah_profiler.tock(pcm)
-                log.info(partial_transcript, end='')
+                print(partial_transcript, end='', flush=True)
                 user_request += partial_transcript
                 if endpoint_reached:
                     utterance_end_sec = time.time()
@@ -322,8 +299,9 @@ def handler(_, __) -> None:
                     remaining_transcript = cheetah.flush()
                     cheetah_profiler.tock()
                     user_request += remaining_transcript
-                    log.info(remaining_transcript, end='\n\n')
-                    log.debug(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]")
+                    print(remaining_transcript, end='\n\n')
+                    if profile:
+                        print(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]")
             else:
                 dialog.add_human_request(user_request)
 
@@ -333,9 +311,9 @@ def llm_callback(text: str) -> None:
                     picollm_profiler.tock()
                     main_connection.send(
                         {'command': 'synthesize', 'text': text, 'utterance_end_sec': utterance_end_sec})
-                    log.info(text, end='')
+                    print(text, end='', flush=True)
 
-                log.info("\nLLM > ", end='')
+                print("\nLLM > ", end='', flush=True)
                 res = pllm.generate(
                     prompt=dialog.prompt(),
                     completion_token_limit=picollm_completion_token_limit,
@@ -345,15 +323,17 @@ def llm_callback(text: str) -> None:
                     top_p=picollm_top_p,
                     stream_callback=llm_callback)
                 main_connection.send({'command': 'flush'})
-                log.info('\n')
+                print('\n')
                 dialog.add_llm_response(res.completion)
-                log.debug(f"[picoLLM TPS: {picollm_profiler.tps():.2f}]")
+                if profile:
+                    print(f"[picoLLM TPS: {picollm_profiler.tps():.2f}]")
 
                 while not main_connection.poll():
                     time.sleep(0.01)
                 message = main_connection.recv()
-                log.debug(f"[Orca RTF: {message['rtf']:.2f}]")
-                log.debug(f"[Delay: {message['delay']:.2f} sec]")
+                if profile:
+                    print(f"[Orca RTF: {message['rtf']:.2f}]")
+                    print(f"[Delay: {message['delay']:.2f} sec]")
                 while not main_connection.poll():
                     time.sleep(0.01)
                 assert main_connection.recv()['done']
@@ -361,7 +341,7 @@ def llm_callback(text: str) -> None:
                 wake_word_detected = False
                 user_request = ''
                 endpoint_reached = False
-                log.info("\n$ Say `Picovoice` ...")
+                print(f"\n$ Say {'Picovoice' if keyword_model_path is None else 'the wake word'} ...")
     finally:
         main_connection.send({'command': 'close'})
         mic.delete()

From 395801015e508466afea69936362e859c8d226a1 Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Thu, 16 May 2024 16:11:52 -0700
Subject: [PATCH 09/19] minor

---
 recipes/llm-voice-assistant/python/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index bc5d446..fb8803a 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -255,7 +255,7 @@ def main() -> None:
     mic = PvRecorder(frame_length=porcupine.frame_length)
     mic.start()
 
-    print(f"\n$ Say {'Picovoice' if keyword_model_path is None else 'the wake word'} ...")
+    print(f"\n$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...")
 
     stop = [False]
 
@@ -341,7 +341,7 @@ def llm_callback(text: str) -> None:
                 wake_word_detected = False
                 user_request = ''
                 endpoint_reached = False
-                print(f"\n$ Say {'Picovoice' if keyword_model_path is None else 'the wake word'} ...")
+                print(f"\n$ Say {'`Picovoice`' if keyword_model_path is None else 'the wake word'} ...")
     finally:
         main_connection.send({'command': 'close'})
         mic.delete()

From 2b8a4c956a924cd9cbfdc3d7889bde1e44fb969a Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Thu, 16 May 2024 16:37:59 -0700
Subject: [PATCH 10/19] wip

---
 recipes/llm-voice-assistant/python/README.md | 47 +++++++++++++++++---
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/README.md b/recipes/llm-voice-assistant/python/README.md
index 76bac47..8863ec4 100644
--- a/recipes/llm-voice-assistant/python/README.md
+++ b/recipes/llm-voice-assistant/python/README.md
@@ -5,33 +5,70 @@
 
 ## AccessKey
 
+AccessKey is your authentication and authorization token for deploying Picovoice SDKs, including picoLLM. Anyone who is
+using Picovoice needs to have a valid AccessKey. You must keep your AccessKey secret. You would need internet
+connectivity to validate your AccessKey with Picovoice license servers even though the LLM inference is running 100%
+offline and completely free for open-weight models. Everyone who signs up for
+[Picovoice Console](https://console.picovoice.ai/) receives a unique AccessKey.
+
 ## picoLLM Model
 
-## Custom Wake Word (Optional)
+picoLLM Inference Engine supports many open-weight models. The models are on
+[Picovoice Console](https://console.picovoice.ai/).
 
 ## Usage
 
+Install the required packages:
+
 ```console
 pip install -r requirements.txt
 ```
 
+Run the demo:
+
 ```console
 python3 main.py --access_key ${ACCESS_KEY} --picollm_model_path ${PICOLLM_MODEL_PATH} 
 ```
 
+Replace `${ACCESS_KEY}` with yours obtained from Picovoice Console and `${PICOLLM_MODEL_PATH}` with the path to the 
+model downloaded from Picovoice Console.
+
+To see all available options, type the following:
+
 ```console
 python main.py --help
 ```
 
-## Profiling
+## Custom Wake Word (Optional)
 
-### Realtime Factor
+The demo's default wake phrase is `Picovoice`. You can generate your custom (branded) wake word using Picovoice  Console by following [Porcupine Wake Word documentation (https://picovoice.ai/docs/porcupine/). Once you have the model trained, simply pass it to the demo
+application using `--keyword_model_path` argument.
 
-### Token per Second
+## Profiling
 
-### Latency
+To see the runtime profiling metrics, run the demo with the `--profile` argument:
 
 ```console
 python3 main.py --access_key ${ACCESS_KEY} --picollm_model_path ${PICOLLM_MODEL_PATH} --profile 
 ```
 
+Replace `${ACCESS_KEY}` with yours obtained from Picovoice Console and `${PICOLLM_MODEL_PATH}` with the path to the 
+model downloaded from Picovoice Console.
+
+The demo profiles three metrics: Real-time Factor (RTF), Token per Second (TPS), and Latency.
+
+### Real-time Factor (RTF)
+
+RTF is a standard metric for measuring the speed of speech processing (e.g., wake word, speech-to-text, and 
+text-to-speech). RTF is the CPU time divided by the processed (recognized or synthesized) audio length. Hence, a lower RTF means a more efficient engine.
+
+### Token per Second (PPS)
+
+Token per second is the standard metric for measuring the speed of LLM inference engines. TPS is the number of 
+generated tokens divided by the compute time used to create them. A higher TPS is better.
+
+### Latency
+
+We measure the latency as the delay between the end of the user's utterance (i.e., the time when the user finishes talking) and the 
+time that the voice assistant generates the first chunk of the audio response (i.e., when the user starts hearing the response).
+

From bffe1a4665a16cba695c46e0a8088500b0779626 Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Thu, 16 May 2024 16:38:42 -0700
Subject: [PATCH 11/19] minor

---
 recipes/llm-voice-assistant/python/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/recipes/llm-voice-assistant/python/README.md b/recipes/llm-voice-assistant/python/README.md
index 8863ec4..9081ce6 100644
--- a/recipes/llm-voice-assistant/python/README.md
+++ b/recipes/llm-voice-assistant/python/README.md
@@ -39,7 +39,7 @@ To see all available options, type the following:
 python main.py --help
 ```
 
-## Custom Wake Word (Optional)
+## Custom Wake Word
 
 The demo's default wake phrase is `Picovoice`. You can generate your custom (branded) wake word using Picovoice  Console by following [Porcupine Wake Word documentation (https://picovoice.ai/docs/porcupine/). Once you have the model trained, simply pass it to the demo
 application using `--keyword_model_path` argument.

From f4598b52df9e47c44b70af09e16400714f1d9849 Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Thu, 16 May 2024 18:13:55 -0700
Subject: [PATCH 12/19] fix

---
 recipes/llm-voice-assistant/python/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/recipes/llm-voice-assistant/python/requirements.txt b/recipes/llm-voice-assistant/python/requirements.txt
index 4657101..ed03197 100644
--- a/recipes/llm-voice-assistant/python/requirements.txt
+++ b/recipes/llm-voice-assistant/python/requirements.txt
@@ -1,4 +1,5 @@
 numpy
+picollm==1.0.0
 pvcheetah==2.0.1
 pvorca==0.2.1
 pvporcupine==3.0.2

From e2e576c2abb35f200c9b97b06ddbecd0a278bfff Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Tue, 21 May 2024 11:20:50 -0700
Subject: [PATCH 13/19] rev

---
 recipes/llm-voice-assistant/README.md        |  2 +-
 recipes/llm-voice-assistant/python/README.md |  2 +-
 recipes/llm-voice-assistant/python/main.py   | 12 ++++++------
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/recipes/llm-voice-assistant/README.md b/recipes/llm-voice-assistant/README.md
index ef7c508..958d7ac 100644
--- a/recipes/llm-voice-assistant/README.md
+++ b/recipes/llm-voice-assistant/README.md
@@ -6,7 +6,7 @@ Hands-free voice assistant powered by a large language model (LLM), all voice re
 
 - [Porcupine Wake Word](https://picovoice.ai/docs/porcupine/)
 - [Cheetah Streaming Speech-to-Text](https://picovoice.ai/docs/cheetah/)
-- [picoLLM Inference Engine]()
+- [picoLLM Inference Engine](https://github.com/Picovoice/picollm)
 - [Orca Streaming Text-to-Speech](https://picovoice.ai/docs/orca/)
 
 ## Implementations
diff --git a/recipes/llm-voice-assistant/python/README.md b/recipes/llm-voice-assistant/python/README.md
index 9081ce6..d8bb6de 100644
--- a/recipes/llm-voice-assistant/python/README.md
+++ b/recipes/llm-voice-assistant/python/README.md
@@ -1,7 +1,7 @@
 ## Compatibility
 
 - Python 3.8+
-- Runs on Linux (x86_64), macOS (arm64, x86_64), Windows (x86_64), and Raspberry Pi (5, 4, and 3).
+- Runs on Linux (x86_64), macOS (arm64, x86_64), Windows (x86_64), and Raspberry Pi (5 and 4).
 
 ## AccessKey
 
diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index fb8803a..75b04cd 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -25,10 +25,10 @@ def __init__(self, sample_rate: int) -> None:
         self._tick_sec = 0.
 
     def tick(self) -> None:
-        self._tick_sec = time.time()
+        self._tick_sec = time.perf_counter()
 
     def tock(self, audio: Optional[Sequence[int]] = None) -> None:
-        self._compute_sec += time.time() - self._tick_sec
+        self._compute_sec += time.perf_counter() - self._tick_sec
         self._audio_sec += (len(audio) / self._sample_rate) if audio is not None else 0.
 
     def rtf(self) -> float:
@@ -45,12 +45,12 @@ def __init__(self) -> None:
 
     def tock(self) -> None:
         if self._start_sec == 0.:
-            self._start_sec = time.time()
+            self._start_sec = time.perf_counter()
         else:
             self._num_tokens += 1
 
     def tps(self) -> float:
-        tps = self._num_tokens / (time.time() - self._start_sec)
+        tps = self._num_tokens / (time.perf_counter() - self._start_sec)
         self._num_tokens = 0
         self._start_sec = 0.
         return tps
@@ -102,7 +102,7 @@ def buffer_pcm(x: Optional[Sequence[int]]) -> None:
         if x is not None:
             pcm_buffer.extend(x)
             if delay_sec[0] == -1:
-                delay_sec[0] = time.time() - utterance_end_sec
+                delay_sec[0] = time.perf_counter() - utterance_end_sec
 
     while True:
         if synthesize and len(texts) > 0:
@@ -294,7 +294,7 @@ def handler(_, __) -> None:
                 print(partial_transcript, end='', flush=True)
                 user_request += partial_transcript
                 if endpoint_reached:
-                    utterance_end_sec = time.time()
+                    utterance_end_sec = time.perf_counter()
                     cheetah_profiler.tick()
                     remaining_transcript = cheetah.flush()
                     cheetah_profiler.tock()

From c6dea23141af2d9fa0623ac3c66619bad86f6d94 Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Tue, 21 May 2024 12:54:44 -0700
Subject: [PATCH 14/19] eos

---
 recipes/llm-voice-assistant/python/main.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index 75b04cd..c6f1209 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -307,11 +307,23 @@ def handler(_, __) -> None:
 
                 picollm_profiler = TPSProfiler()
 
+                eos_tokens = {
+                    '<end_of_turn>',  # Gemma
+                    '</s>',  # Llama-2, Mistral, and Mixtral
+                    '<|eot_id|>',  # Llama-3
+                    'Human: ',  # Phi-2
+                    'Instruct: ',  # Phi-2
+                }
+
+                completion = ['']
+
                 def llm_callback(text: str) -> None:
                     picollm_profiler.tock()
-                    main_connection.send(
-                        {'command': 'synthesize', 'text': text, 'utterance_end_sec': utterance_end_sec})
-                    print(text, end='', flush=True)
+                    completion[0] += text
+                    if not any(x in completion[0] for x in eos_tokens):
+                        main_connection.send(
+                            {'command': 'synthesize', 'text': text, 'utterance_end_sec': utterance_end_sec})
+                        print(text, end='', flush=True)
 
                 print("\nLLM > ", end='', flush=True)
                 res = pllm.generate(

From 3bd9daa682a552477c6f6185cf38ead0511bac95 Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Tue, 21 May 2024 14:37:56 -0700
Subject: [PATCH 15/19] rev

---
 recipes/llm-voice-assistant/python/main.py | 6 +++++-
 res/.lint/spell-check/dict.txt             | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index c6f1209..c6a7a07 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -217,6 +217,7 @@ def main() -> None:
         help="Duration of the synthesized audio to buffer before streaming it out. A higher value helps slower "
              "(e.g., Raspberry Pi) to keep up with real-time at the cost of increasing the initial delay.")
     parser.add_argument('--profile', action='store_true', help='Show runtime profiling information.')
+    parser.add_argument('--short_answers', action='store_true')
     args = parser.parse_args()
 
     access_key = args.access_key
@@ -231,6 +232,7 @@ def main() -> None:
     picollm_top_p = args.picollm_top_p
     orca_warmup_sec = args.orca_warmup_sec
     profile = args.profile
+    short_answers = args.short_answers
 
     if keyword_model_path is None:
         porcupine = pvporcupine.create(access_key=access_key, keywords=['picovoice'])
@@ -303,7 +305,9 @@ def handler(_, __) -> None:
                     if profile:
                         print(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]")
             else:
-                dialog.add_human_request(user_request)
+                dialog.add_human_request(
+                    f"Provide a short answer to the following question. {user_request}" if short_answers
+                    else user_request)
 
                 picollm_profiler = TPSProfiler()
 
diff --git a/res/.lint/spell-check/dict.txt b/res/.lint/spell-check/dict.txt
index c2d9a00..f6105db 100644
--- a/res/.lint/spell-check/dict.txt
+++ b/res/.lint/spell-check/dict.txt
@@ -1,5 +1,6 @@
 dtype
 logit
+mixtral
 numpy
 pico
 picollm

From 44ae2ee5dc6cd4ed93eaa4ad3eb83fb2fdb5c4f5 Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Tue, 21 May 2024 14:44:01 -0700
Subject: [PATCH 16/19] rev

---
 recipes/llm-voice-assistant/python/main.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index c6a7a07..26dbe0b 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -240,7 +240,10 @@ def main() -> None:
         porcupine = pvporcupine.create(access_key=access_key, keyword_paths=[keyword_model_path])
     print(f"→ Porcupine V{porcupine.version}")
 
-    cheetah = pvcheetah.create(access_key=access_key, endpoint_duration_sec=cheetah_endpoint_duration_sec)
+    cheetah = pvcheetah.create(
+        access_key=access_key,
+        endpoint_duration_sec=cheetah_endpoint_duration_sec,
+        enable_automatic_punctuation=True)
     print(f"→ Cheetah V{cheetah.version}")
 
     pllm = picollm.create(access_key=access_key, model_path=picollm_model_path, device=picollm_device)
@@ -312,11 +315,10 @@ def handler(_, __) -> None:
                 picollm_profiler = TPSProfiler()
 
                 eos_tokens = {
-                    '<end_of_turn>',  # Gemma
                     '</s>',  # Llama-2, Mistral, and Mixtral
+                    '<end_of_turn>',  # Gemma
+                    '<|endoftext|>',  # Phi-2
                     '<|eot_id|>',  # Llama-3
-                    'Human: ',  # Phi-2
-                    'Instruct: ',  # Phi-2
                 }
 
                 completion = ['']

From e638e4775712391930719031b8c8a99daf8d79cb Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Tue, 21 May 2024 14:49:17 -0700
Subject: [PATCH 17/19] fix

---
 recipes/llm-voice-assistant/python/main.py | 1 +
 res/.lint/spell-check/dict.txt             | 1 +
 2 files changed, 2 insertions(+)

diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index 26dbe0b..d0f509c 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -181,6 +181,7 @@ def main() -> None:
     parser.add_argument(
         '--picollm_completion_token_limit',
         type=int,
+        default=256,
         help="Maximum number of tokens in the completion. Set to `None` to impose no limit.")
     parser.add_argument(
         '--picollm_presence_penalty',
diff --git a/res/.lint/spell-check/dict.txt b/res/.lint/spell-check/dict.txt
index f6105db..1893aea 100644
--- a/res/.lint/spell-check/dict.txt
+++ b/res/.lint/spell-check/dict.txt
@@ -1,4 +1,5 @@
 dtype
+endoftext
 logit
 mixtral
 numpy

From 5e7d26c79d043c858657c2fd92fe1db964cd1934 Mon Sep 17 00:00:00 2001
From: Alireza Kenarsari <alireza@picovoice.ai>
Date: Wed, 22 May 2024 09:36:05 -0700
Subject: [PATCH 18/19] rev

---
 recipes/llm-voice-assistant/python/main.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/main.py b/recipes/llm-voice-assistant/python/main.py
index d0f509c..a10fdeb 100644
--- a/recipes/llm-voice-assistant/python/main.py
+++ b/recipes/llm-voice-assistant/python/main.py
@@ -309,13 +309,14 @@ def handler(_, __) -> None:
                     if profile:
                         print(f"[Cheetah RTF: {cheetah_profiler.rtf():.3f}]")
             else:
+                short_answers_instruction = \
+                    "You are a voice assistant and your answers are very short but informative"
                 dialog.add_human_request(
-                    f"Provide a short answer to the following question. {user_request}" if short_answers
-                    else user_request)
+                    f"{short_answers_instruction}. {user_request}" if short_answers else user_request)
 
                 picollm_profiler = TPSProfiler()
 
-                eos_tokens = {
+                stop_phrases = {
                     '</s>',  # Llama-2, Mistral, and Mixtral
                     '<end_of_turn>',  # Gemma
                     '<|endoftext|>',  # Phi-2
@@ -327,15 +328,18 @@ def handler(_, __) -> None:
                 def llm_callback(text: str) -> None:
                     picollm_profiler.tock()
                     completion[0] += text
-                    if not any(x in completion[0] for x in eos_tokens):
-                        main_connection.send(
-                            {'command': 'synthesize', 'text': text, 'utterance_end_sec': utterance_end_sec})
+                    if not any(x in completion[0] for x in stop_phrases):
+                        main_connection.send({
+                            'command': 'synthesize',
+                            'text': text.replace('\n', ' . '),
+                            'utterance_end_sec': utterance_end_sec})
                         print(text, end='', flush=True)
 
                 print("\nLLM > ", end='', flush=True)
                 res = pllm.generate(
                     prompt=dialog.prompt(),
                     completion_token_limit=picollm_completion_token_limit,
+                    stop_phrases=stop_phrases,
                     presence_penalty=picollm_presence_penalty,
                     frequency_penalty=picollm_frequency_penalty,
                     temperature=picollm_temperature,

From 385b37750b68adab9da6fec7f1e19241202621d4 Mon Sep 17 00:00:00 2001
From: bejager <131197468+bejager@users.noreply.github.com>
Date: Fri, 24 May 2024 14:30:12 -0700
Subject: [PATCH 19/19] Update requirements.txt

---
 recipes/llm-voice-assistant/python/requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/recipes/llm-voice-assistant/python/requirements.txt b/recipes/llm-voice-assistant/python/requirements.txt
index ed03197..f911465 100644
--- a/recipes/llm-voice-assistant/python/requirements.txt
+++ b/recipes/llm-voice-assistant/python/requirements.txt
@@ -1,7 +1,7 @@
 numpy
 picollm==1.0.0
 pvcheetah==2.0.1
-pvorca==0.2.1
+pvorca==0.2.2
 pvporcupine==3.0.2
 pvrecorder==1.2.2
-sounddevice
\ No newline at end of file
+sounddevice