From fcf0b8781551dbd9abed9a35285cdeb61cd78db2 Mon Sep 17 00:00:00 2001
From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com>
Date: Mon, 7 Oct 2024 01:53:19 -0500
Subject: [PATCH] node-llama-cpp v3 cli

---
 docs/examples.md           |  9 +++++
 src/commands/processRSS.ts | 15 ++++-----
 src/llms/llama.ts          | 68 ++++++++++++++++++++++----------------
 test/all.test.js           | 30 +++++++++++++++++
 4 files changed, 84 insertions(+), 38 deletions(-)

diff --git a/docs/examples.md b/docs/examples.md
index 924c674..1ed1195 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -251,6 +251,15 @@ npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo WIZAR
 npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama
 ```
 
+Select Llama model:
+
+```bash
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama GEMMA_2_2B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama LLAMA_3_2_1B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama PHI_3_5
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama QWEN_2_5_3B
+```
+
 ### Ollama
 
 ```bash
diff --git a/src/commands/processRSS.ts b/src/commands/processRSS.ts
index d1a49dc..1259aa7 100644
--- a/src/commands/processRSS.ts
+++ b/src/commands/processRSS.ts
@@ -142,10 +142,7 @@ export async function processRSS(
 
     // Extract channel and item information
     const {
-      title: channelTitle,
-      link: channelLink,
-      image: channelImageObject,
-      item: feedItems,
+      title: channelTitle, link: channelLink, image: channelImageObject, item: feedItems,
     } = feed.rss.channel
 
     // Extract channel image URL safely
@@ -213,13 +210,13 @@ export async function processRSS(
 
     // Process each item in the feed
     for (const [index, item] of itemsToProcess.entries()) {
-      log(opts(`\n==============================================================`))
-      log(opts(`  Item ${index + 1}/${itemsToProcess.length} processing: ${item.title}`))
-      log(opts(`==============================================================\n`))
+      log(opts(`\n========================================================================================`))
+      log(opts(`  Item ${index + 1}/${itemsToProcess.length} processing:\n\n${item.title}`))
+      log(opts(`========================================================================================\n`))
       await processItem(options, item, llmServices, transcriptServices)
-      log(final(`\n==============================================================`))
+      log(final(`\n========================================================================================`))
       log(final(`  ${index + 1}/${itemsToProcess.length} item processing completed successfully`))
-      log(final(`==============================================================\n`))
+      log(final(`========================================================================================\n`))
     }
   } catch (error) {
     console.error(`Error processing RSS feed: ${(error as Error).message}`)
diff --git a/src/llms/llama.ts b/src/llms/llama.ts
index 56cee8c..6dd89b7 100644
--- a/src/llms/llama.ts
+++ b/src/llms/llama.ts
@@ -1,58 +1,56 @@
 // src/llms/llama.ts
 
-import { writeFile, mkdir } from 'node:fs/promises'
-import { getLlama, LlamaChatSession } from "node-llama-cpp"
+import { writeFile } from 'node:fs/promises'
 import { existsSync } from 'node:fs'
-import { exec } from 'node:child_process'
-import { promisify } from 'node:util'
+import { resolve } from 'node:path'
 import { LLAMA_MODELS } from '../models.js'
 import { log, success, wait } from '../models.js'
+import { getLlama, LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp"
+import { createModelDownloader } from 'node-llama-cpp'
 
 import type { LlamaModelType, LLMFunction } from '../types.js'
 
-const execAsync = promisify(exec)
+let model: LlamaModel | null = null
+let context: LlamaContext | null = null
 
 /**
- * Main function to call the local Llama model.
+ * Main function to call the local Llama model using node-llama-cpp API.
  * @param promptAndTranscript - The combined prompt and transcript content.
  * @param tempPath - The temporary file path to write the LLM output.
- * @param model - The model name or undefined to use the default model.
+ * @param modelName - The model name or undefined to use the default model.
  * @returns A Promise that resolves when the processing is complete.
  * @throws {Error} - If an error occurs during processing.
  */
 export const callLlama: LLMFunction = async (
   promptAndTranscript: string,
   tempPath: string,
-  model?: string
+  modelName?: string
 ) => {
   try {
-    // Get the model object from LLAMA_MODELS using the provided model name or default to GEMMA_2_2B
-    const selectedModel = LLAMA_MODELS[model as LlamaModelType] || LLAMA_MODELS.GEMMA_2_2B
+    // Get the model object from LLAMA_MODELS using the provided model name or default to QWEN_2_5_3B
+    const selectedModel = LLAMA_MODELS[modelName as LlamaModelType] || LLAMA_MODELS.QWEN_2_5_3B
     log(wait(`  - filename: ${selectedModel.filename}\n  - url: ${selectedModel.url}\n`))
 
     // If no valid model is found, throw an error
     if (!selectedModel) {
-      throw new Error(`Invalid model name: ${model}`)
+      throw new Error(`Invalid model name: ${modelName}`)
     }
 
     // Construct the path where the model file should be stored
-    const modelPath = `./src/llms/models/${selectedModel.filename}`
+    const modelDir = resolve('./src/llms/models')
+    const modelPath = resolve(modelDir, selectedModel.filename)
 
     // Check if the model file already exists, if not, download it
     if (!existsSync(modelPath)) {
       log(success(`\nDownloading ${selectedModel.filename}...`))
-
       try {
-        // Create the directory for storing models if it doesn't exist
-        await mkdir('./src/llms/models', { recursive: true })
-
-        // Download the model using curl
-        const { stderr } = await execAsync(`curl -L ${selectedModel.url} -o ${modelPath}`)
-
-        // If there's any stderr output, log completed
-        if (stderr) log(success('Download completed'))
+        const downloader = await createModelDownloader({
+          modelUri: selectedModel.url,
+          dirPath: modelDir
+        })
+        await downloader.download()
+        log(success('Download completed'))
       } catch (err) {
-        // If an error occurs during download, log it and throw a new error
         console.error(`Download failed: ${err instanceof Error ? err.message : String(err)}`)
         throw new Error('Failed to download the model')
       }
@@ -60,17 +58,29 @@ export const callLlama: LLMFunction = async (
       log(wait(`  modelPath found:\n    - ${modelPath}`))
     }
 
-    // Initialize Llama and load the local model
-    const llama = await getLlama()
-    const localModel = await llama.loadModel({ modelPath })
+    // Initialize Llama and load the local model if not already loaded
+    if (!model || !context) {
+      const llama = await getLlama()
+      model = await llama.loadModel({ modelPath })
+      context = await model.createContext({ })
+    }
 
-    // Create a context for the model and create a chat session
-    const context = await localModel.createContext()
+    // Create a chat session
     const session = new LlamaChatSession({ contextSequence: context.getSequence() })
 
-    // Generate a response and write the response to a file
-    const response = await session.prompt(promptAndTranscript)
+    // Generate a response
+    const response = await session.prompt(promptAndTranscript, {
+      maxTokens: -1,
+      temperature: 0.7,
+      topK: 40,
+      topP: 0.95,
+      // repeatPenalty: 1.1
+    })
+
+    // Write the response to the temporary file
     await writeFile(tempPath, response)
+
+    log(success('LLM processing completed'))
   } catch (error) {
     console.error(`Error in callLlama: ${error instanceof Error ? (error as Error).message : String(error)}`)
     throw error
diff --git a/test/all.test.js b/test/all.test.js
index a86828f..d3b7247 100644
--- a/test/all.test.js
+++ b/test/all.test.js
@@ -8,11 +8,13 @@ import { join } from 'node:path'
 
 const commands = [
   {
+    // Process a single YouTube video using Autoshow's default settings.
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk"',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md',
     newName: '01---ep0-fsjam-podcast-prompt.md'
   },
   {
+    // Process all videos in a specified YouTube playlist.
     cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr"',
     expectedFiles: [
       { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: '02A---ep1-fsjam-podcast-prompt.md' },
@@ -20,6 +22,7 @@ const commands = [
     ]
   },
   {
+    // Process playlist videos with custom title prompt, tiny Whisper model, and Llama for LLM processing.
     cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" --prompt titles --whisper tiny --llama',
     expectedFiles: [
       { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: '03A---ep1-fsjam-podcast-llama-shownotes.md' },
@@ -27,6 +30,7 @@ const commands = [
     ]
   },
   {
+    // Process multiple YouTube videos from URLs listed in a file.
     cmd: 'npm run as -- --urls "content/example-urls.md"',
     expectedFiles: [
       { file: '2024-09-24-ep1-fsjam-podcast-prompt.md', newName: '04A---ep1-fsjam-podcast-prompt.md' },
@@ -34,6 +38,7 @@ const commands = [
     ]
   },
   {
+    // Process multiple YouTube videos from URLs with title prompts, Whisper 'tiny' model, and Llama.
     cmd: 'npm run as -- --urls "content/example-urls.md" --prompt titles --whisper tiny --llama',
     expectedFiles: [
       { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: '05A---ep1-fsjam-podcast-llama-shownotes.md' },
@@ -41,126 +46,151 @@ const commands = [
     ]
   },
   {
+    // Process a single local audio file.
     cmd: 'npm run as -- --file "content/audio.mp3"',
     expectedFile: 'audio-prompt.md',
     newName: '06---audio-prompt.md'
   },
   {
+    // Process local audio file with title prompts, Whisper 'tiny' model, and Llama.
     cmd: 'npm run as -- --file "content/audio.mp3" --prompt titles --whisper tiny --llama',
     expectedFile: 'audio-llama-shownotes.md',
     newName: '07---audio-llama-shownotes.md'
   },
   {
+    // Process podcast RSS feed from default order.
     cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed"',
     expectedFile: '2021-05-10-thoughts-on-lambda-school-layoffs-prompt.md',
     newName: '08---thoughts-on-lambda-school-layoffs-prompt.md'
   },
   {
+    // Process a video using ChatGPT for LLM operations.
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --chatgpt',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-chatgpt-shownotes.md',
     newName: '09---ep0-fsjam-podcast-chatgpt-shownotes.md'
   },
   {
+    // Process video with ChatGPT using GPT_4o_MINI model.
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --chatgpt GPT_4o_MINI',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-chatgpt-shownotes.md',
     newName: '10---ep0-fsjam-podcast-chatgpt-shownotes.md'
   },
   {
+    // Process a video using Claude for LLM operations.
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --claude',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-claude-shownotes.md',
     newName: '11---ep0-fsjam-podcast-claude-shownotes.md'
   },
   {
+    // Process video with Claude using CLAUDE_3_SONNET model.
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --claude CLAUDE_3_SONNET',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-claude-shownotes.md',
     newName: '12---ep0-fsjam-podcast-claude-shownotes.md'
   },
   // {
+  //   // Process a video using Gemini for LLM operations.
   //   cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --gemini',
   //   expectedFile: '2024-09-24-ep0-fsjam-podcast-gemini-shownotes.md',
   //   newName: '13---ep0-fsjam-podcast-gemini-shownotes.md'
   // },
   // {
+  //   // Process video with Gemini using GEMINI_1_5_FLASH model.
   //   cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --gemini GEMINI_1_5_FLASH',
   //   expectedFile: '2024-09-24-ep0-fsjam-podcast-gemini-shownotes.md',
   //   newName: '14---ep0-fsjam-podcast-gemini-shownotes.md'
   // },
   {
+    // Process a video using Cohere for LLM operations
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --cohere',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-cohere-shownotes.md',
     newName: '15---ep0-fsjam-podcast-cohere-shownotes.md'
   },
   {
+    // Process video with Cohere using COMMAND_R_PLUS model.
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --cohere COMMAND_R_PLUS',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-cohere-shownotes.md',
     newName: '16---ep0-fsjam-podcast-cohere-shownotes.md'
   },
   {
+    // Process a video using Mistral for LLM operations
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --mistral',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-mistral-shownotes.md',
     newName: '17---ep0-fsjam-podcast-mistral-shownotes.md'
   },
   {
+    // Process video with Mistral using MIXTRAL_8x7b model.
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --mistral MIXTRAL_8x7b',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-mistral-shownotes.md',
     newName: '18---ep0-fsjam-podcast-mistral-shownotes.md'
   },
   {
+    // Process a video using OctoAI for LLM operations
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-octo-shownotes.md',
     newName: '19---ep0-fsjam-podcast-octo-shownotes.md'
   },
   {
+    // Process video with Octo using LLAMA_3_1_8B model.
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo LLAMA_3_1_8B',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-octo-shownotes.md',
     newName: '20---ep0-fsjam-podcast-octo-shownotes.md'
   },
   {
+    // Process a video using Llama for local LLM operations
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md',
     newName: '21---ep0-fsjam-podcast-llama-shownotes.md'
   },
   {
+    // Process a video using Ollama for LLM operations
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md',
     newName: '22---ep0-fsjam-podcast-ollama-shownotes.md'
   },
   {
+    // Process a video using Deepgram for transcription
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --deepgram',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md',
     newName: '23---ep0-fsjam-podcast-prompt.md'
   },
   {
+    // Process video using Deepgram and Llama.
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --deepgram --llama',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md',
     newName: '24---ep0-fsjam-podcast-llama-shownotes.md'
   },
   {
+    // Process a video using AssemblyAI for transcription
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --assembly',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md',
     newName: '25---ep0-fsjam-podcast-prompt.md'
   },
   {
+    // Process video using AssemblyAI and Llama.
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --assembly --llama',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md',
     newName: '26---ep0-fsjam-podcast-llama-shownotes.md'
   },
   {
+    // Process an audio file using AssemblyAI with speaker labels
     cmd: 'npm run as -- --video "https://ajc.pics/audio/fsjam-short.mp3" --assembly --speakerLabels',
     expectedFile: '2024-05-08-fsjam-short-prompt.md',
     newName: '27---fsjam-short-prompt.md'
   },
   {
+    // Process video using Whisper.cpp in Docker with 'tiny' model.
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker tiny',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md',
     newName: '28---ep0-fsjam-podcast-prompt.md'
   },
   {
+    // Process a video with all available prompt options (except smallChapters and longChapters)
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary shortChapters mediumChapters longChapters takeaways questions',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md',
     newName: '29---ep0-fsjam-podcast-prompt.md'
   },
   {
+    // Process video with multiple prompt sections, Whisper 'tiny' model, and Llama.
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary shortChapters takeaways questions --whisper tiny --llama',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md',
     newName: '30---ep0-fsjam-podcast-llama-shownotes.md'