diff --git a/recipes/llm-voice-assistant/nodejs/index.js b/recipes/llm-voice-assistant/nodejs/index.js index 140b203..df1898e 100755 --- a/recipes/llm-voice-assistant/nodejs/index.js +++ b/recipes/llm-voice-assistant/nodejs/index.js @@ -3,7 +3,7 @@ const { Porcupine, BuiltinKeyword } = require('@picovoice/porcupine-node'); const { Cheetah } = require('@picovoice/cheetah-node'); -const { PicoLLM } = require('@picovoice/picollm-node'); +const { PicoLLM, PicoLLMEndpoint } = require('@picovoice/picollm-node'); const { Orca } = require('@picovoice/orca-node'); const { PvRecorder } = require('@picovoice/pvrecorder-node'); const { PvSpeaker } = require('@picovoice/pvspeaker-node'); @@ -156,16 +156,16 @@ async function llmVoiceAssistant() { const isShortAnswers = program.short_answers; let porcupine = new Porcupine(accessKey, [keywordModelPath ?? BuiltinKeyword.PICOVOICE], [0.5]); - process.stdout.write(`\n→ Porcupine V${porcupine.version}`); + process.stdout.write(`\n→ Porcupine v${porcupine.version}`); const cheetah = new Cheetah(accessKey, { endpointDurationSec, enableAutomaticPunctuation: true }); - process.stdout.write(`\n→ Cheetah V${cheetah.version}`); + process.stdout.write(`\n→ Cheetah v${cheetah.version}`); const pllm = new PicoLLM(accessKey, picollmModelPath, { device: picollmDevice }); - process.stdout.write(`\n→ picoLLM V${pllm.version} <${pllm.model}>`); + process.stdout.write(`\n→ picoLLM v${pllm.version} <${pllm.model}>`); const orca = new Orca(accessKey); - process.stdout.write(`\n→ Orca V${orca.version}`); + process.stdout.write(`\n→ Orca v${orca.version}`); const dialog = pllm.getDialog(); const orcaStream = orca.streamOpen(); @@ -186,9 +186,25 @@ async function llmVoiceAssistant() { const orcaProfiler = new RTFProfiler(orca.sampleRate); let utteranceEndMillisec = 0; let delaySec = -1; + let picollmProfiler = new TPSProfiler(); + let isListeningForInterrupt = false; + let isInterruptingGeneration = false; + let completion = ''; + let isStartedPlaying = false; + const stopPhrases = [ + '', // Llama-2, Mistral, and Mixtral + '', // Gemma + '<|endoftext|>', // Phi-2 + '<|eot_id|>', // Llama-3 + '<|end|>', '<|user|>', '<|assistant|>' // Phi-3 + ]; function handleLlmText(text, isStartedPlaying) { + if (isInterruptingGeneration) { + return false; + } process.stdout.write(text); + orcaProfiler.tick(); const pcm = orcaStream.synthesize(text.replace('\n', ' . ')); orcaProfiler.tock(pcm); @@ -211,6 +227,61 @@ async function llmVoiceAssistant() { return false; } + function onGenerateComplete(res) { + dialog.addLLMResponse(res.completion); + completion = ''; + if (profile) { + process.stdout.write(`\n[picoLLM TPS: ${picollmProfiler.tps()}]`); + } + + orcaProfiler.tick(); + const flushedPcm = orcaStream.flush(); + orcaProfiler.tock(flushedPcm); + if (profile) { + process.stdout.write(`\n[Orca RTF: ${orcaProfiler.rtf()}]`); + process.stdout.write(`\n[Delay: ${delaySec.toFixed(3)} sec]`); + } + + if (res.endpoint === PicoLLMEndpoint.INTERRUPTED) { + isWakeWordDetected = true; + process.stdout.write('\n\n$ Wake word detected, utter your request or question ...'); + process.stdout.write('\n\nUser > '); + } else { + if (flushedPcm !== null) { + pcmBuffer.push(...flushedPcm); + } + + const arrayBuffer = new Int16Array(pcmBuffer).buffer; + speaker.flush(arrayBuffer); + + isWakeWordDetected = false; + process.stdout.write(`\n${ppnPrompt}\n`); + } + speaker.stop(); + delaySec = -1; + + isEndpointReached = false; + userRequest = ''; + pcmBuffer = []; + + isListeningForInterrupt = false; + isInterruptingGeneration = false; + } + + function streamCallback(text) { + if (!isInterruptingGeneration) { + picollmProfiler.tock(); + completion += text; + if (!stopPhrases.some(x => completion.includes(x))) { + isStartedPlaying = handleLlmText(text, isStartedPlaying); + } + } + } + + function sleep(milliSec) { + return new Promise(resolve => setTimeout(resolve, milliSec)); + } + try { while (!isInterrupted) { if (!isWakeWordDetected) { @@ -249,20 +320,13 @@ async function llmVoiceAssistant() { speaker.start(); - const picollmProfiler = new TPSProfiler(); - - const stopPhrases = [ - '', // Llama-2, Mistral, and Mixtral - '', // Gemma - '<|endoftext|>', // Phi-2 - '<|eot_id|>', // Llama-3 - ]; + picollmProfiler = new TPSProfiler(); - let completion = ''; - let isStartedPlaying = false; + completion = ''; + isStartedPlaying = false; - process.stdout.write(`\nLLM >`); - const res = await pllm.generate( + process.stdout.write(`\nLLM (say ${keywordModelPath ? 'the wake word' : '`Picovoice`'} to interrupt) >`); + pllm.generate( dialog.prompt(), { completionTokenLimit: picollmCompletionTokenLimit, @@ -271,42 +335,25 @@ async function llmVoiceAssistant() { frequencyPenalty: picollmFrequencyPenalty, temperature: picollmTemperature, topP: picollmTopP, - streamCallback: text => { - picollmProfiler.tock(); - completion += text; - if (!stopPhrases.some(x => completion.includes(x))) { - isStartedPlaying = handleLlmText(text, isStartedPlaying); - } - }, + streamCallback: streamCallback, }, - ); - - dialog.addLLMResponse(res.completion); - if (profile) { - process.stdout.write(`\n[picoLLM TPS: ${picollmProfiler.tps()}]`); - } - - orcaProfiler.tick(); - const flushedPcm = orcaStream.flush(); - orcaProfiler.tock(flushedPcm); - if (profile) { - process.stdout.write(`\n[Orca RTF: ${orcaProfiler.rtf()}]`); - process.stdout.write(`\n[Delay: ${delaySec.toFixed(3)} sec]`); - } - if (flushedPcm !== null) { - pcmBuffer.push(...flushedPcm); + ).then(onGenerateComplete); + + isListeningForInterrupt = true; + isInterruptingGeneration = false; + while (isListeningForInterrupt) { + const pcm = await recorder.read(); + porcupineProfiler.tick(); + isWakeWordDetected = porcupine.process(pcm) === 0; + porcupineProfiler.tock(pcm); + if (isWakeWordDetected) { + isInterruptingGeneration = true; + pllm.interrupt(); + while (isInterruptingGeneration) { + await sleep(500); + } + } } - - const arrayBuffer = new Int16Array(pcmBuffer).buffer; - speaker.flush(arrayBuffer); - speaker.stop(); - delaySec = -1; - - isWakeWordDetected = false; - isEndpointReached = false; - userRequest = ''; - pcmBuffer = []; - process.stdout.write(`\n${ppnPrompt}\n`); } } } finally { diff --git a/recipes/llm-voice-assistant/nodejs/package.json b/recipes/llm-voice-assistant/nodejs/package.json index 56673f6..edcc386 100644 --- a/recipes/llm-voice-assistant/nodejs/package.json +++ b/recipes/llm-voice-assistant/nodejs/package.json @@ -16,7 +16,7 @@ "dependencies": { "@picovoice/cheetah-node": "^2.0.2", "@picovoice/orca-node": "^1.0.0", - "@picovoice/picollm-node": "=1.0.2", + "@picovoice/picollm-node": "1.1.0", "@picovoice/porcupine-node": "^3.0.3", "@picovoice/pvrecorder-node": "^1.2.3", "@picovoice/pvspeaker-node": "^1.0.1", diff --git a/recipes/llm-voice-assistant/nodejs/yarn.lock b/recipes/llm-voice-assistant/nodejs/yarn.lock index 2ad0810..f38cd36 100644 --- a/recipes/llm-voice-assistant/nodejs/yarn.lock +++ b/recipes/llm-voice-assistant/nodejs/yarn.lock @@ -84,10 +84,10 @@ resolved "https://registry.yarnpkg.com/@picovoice/orca-node/-/orca-node-1.0.0.tgz#812728c3183a914eff6b3189dfa958ef4d44f2f7" integrity sha512-YDTqJ5KsueBC4Nj0Zo287VF+/y7SRjXbOyHy8h66joJYPF0QNsz8oDCzbQO7hzymNbkFXd0crMPK+gQElvd83w== -"@picovoice/picollm-node@=1.0.2": - version "1.0.2" - resolved "https://registry.yarnpkg.com/@picovoice/picollm-node/-/picollm-node-1.0.2.tgz#7262687eb6f0729af0e3ac7def495ccad092d525" - integrity sha512-VfJ5cVcF70BLRBCCyRsY2jFJXrXVpQqITT6Denr9Nd1poVzFk7x3KlJN8UtscXbKuqM+xfV1Jot5UVs9z+cj5g== +"@picovoice/picollm-node@1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@picovoice/picollm-node/-/picollm-node-1.1.0.tgz#8f97ea5e46428af8a902dba0f264724d66999a96" + integrity sha512-vyVdHT/xQBy8LM1VAfxULivNS9gPIypIpvXE6r29IRKw8+VZtVQSv8c2+2O4qLiXD5FUIz8ErOa0A3rhAZHwyQ== "@picovoice/porcupine-node@^3.0.3": version "3.0.3"