From 0ce9b705c644effbb94474c0fb111e691e5ec612 Mon Sep 17 00:00:00 2001
From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com>
Date: Fri, 11 Oct 2024 02:55:31 -0500
Subject: [PATCH 01/10] add ollama server and model checks

---
 src/llms/llama.ts  |   2 +-
 src/llms/ollama.ts | 149 ++++++++++++++++++++++++++++++++++++++++-----
 src/types.ts       |  18 ++++++
 3 files changed, 153 insertions(+), 16 deletions(-)
diff --git a/src/llms/llama.ts b/src/llms/llama.ts
index c1ea1ac..a849e4d 100644
--- a/src/llms/llama.ts
+++ b/src/llms/llama.ts
@@ -80,7 +80,7 @@ export const callLlama: LLMFunction = async (
     // Write the response to the temporary file
     await writeFile(tempPath, response)
 
-    log(wait('  \nLLM processing completed'))
+    log(wait('\n  LLM processing completed'))
   } catch (error) {
     console.error(`Error in callLlama: ${error instanceof Error ? (error as Error).message : String(error)}`)
     throw error
diff --git a/src/llms/ollama.ts b/src/llms/ollama.ts
index 56431a0..6722764 100644
--- a/src/llms/ollama.ts
+++ b/src/llms/ollama.ts
@@ -4,13 +4,14 @@ import { writeFile } from 'node:fs/promises'
 import { env } from 'node:process'
 import { OLLAMA_MODELS } from '../models.js'
 import { log, wait } from '../models.js'
+import { spawn } from 'child_process'
 
-import type { LLMFunction, OllamaModelType, OllamaResponse } from '../types.js'
+import type { LLMFunction, OllamaModelType, OllamaResponse, OllamaTagsResponse } from '../types.js'
 
 /**
  * Main function to call the Llama model using the Ollama REST API.
- * This function checks if the model is available, pulls it if necessary,
- * and then proceeds with the chat.
+ * This function ensures the Ollama server is running, checks if the model is available,
+ * and then proceeds with the chat using a streaming response.
  * @param promptAndTranscript - The combined prompt and transcript content.
  * @param tempPath - The temporary file path to write the LLM output.
  * @param modelName - The name of the model to use.
@@ -21,14 +22,101 @@ export const callOllama: LLMFunction = async (promptAndTranscript: string, tempP
   try {
     // Map the model name to the Ollama model identifier
     const ollamaModelName = OLLAMA_MODELS[modelName as OllamaModelType] || 'llama3.2:1b'
+    log(wait(`    - modelName: ${modelName}\n    - ollamaModelName: ${ollamaModelName}`))
     
     // Get host and port from environment variables or use defaults
     const ollamaHost = env.OLLAMA_HOST || 'localhost'
     const ollamaPort = env.OLLAMA_PORT || '11434'
-    log(wait(`  - Using Ollama model: ${ollamaModelName} at http://${ollamaHost}:${ollamaPort}`))
     
-    // Call the Ollama chat API
-    log(wait(`  - Sending chat request to Ollama...`))
+    // Check if Ollama server is running, start if not
+    async function checkServer(): Promise<boolean> {
+      try {
+        const serverResponse = await fetch(`http://${ollamaHost}:${ollamaPort}`)
+        return serverResponse.ok
+      } catch (error) {
+        return false
+      }
+    }
+
+    if (await checkServer()) {
+      log(wait('    - Ollama server is already running.'))
+    } else {
+      log(wait('    - Ollama server is not running. Attempting to start...'))
+      const ollamaProcess = spawn('ollama', ['serve'], {
+        detached: true,
+        stdio: 'ignore'
+      })
+      ollamaProcess.unref()
+
+      // Wait for the server to be ready
+      let attempts = 0
+      while (attempts < 30) {  // Increased to 30 attempts, 30 seconds total
+        if (await checkServer()) {
+          log(wait('    - Ollama server is now ready.'))
+          break
+        }
+        await new Promise(resolve => setTimeout(resolve, 1000))
+        attempts++
+      }
+      if (attempts === 30) {
+        throw new Error('Ollama server failed to become ready in time.')
+      }
+    }
+    
+    // Check if the model is available, pull if not
+    try {
+      const tagsResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/tags`)
+      if (!tagsResponse.ok) {
+        throw new Error(`HTTP error! status: ${tagsResponse.status}`)
+      }
+      const tagsData = await tagsResponse.json() as OllamaTagsResponse
+      const isModelAvailable = tagsData.models.some(model => model.name === ollamaModelName)
+      if (!isModelAvailable) {
+        log(wait(`\n  Model ${ollamaModelName} is not available, pulling the model...`))
+        const pullResponse = await fetch(`http://${ollamaHost}:${ollamaPort}/api/pull`, {
+          method: 'POST',
+          headers: {
+            'Content-Type': 'application/json',
+          },
+          body: JSON.stringify({ name: ollamaModelName }),
+        })
+        if (!pullResponse.ok) {
+          throw new Error(`Failed to initiate pull for model ${ollamaModelName}`)
+        }
+        if (!pullResponse.body) {
+          throw new Error('Response body is null')
+        }
+        const reader = pullResponse.body.getReader()
+        const decoder = new TextDecoder()
+        while (true) {
+          const { done, value } = await reader.read()
+          if (done) break
+          const chunk = decoder.decode(value)
+          const lines = chunk.split('\n')
+          for (const line of lines) {
+            if (line.trim() === '') continue
+            try {
+              const response = JSON.parse(line)
+              if (response.status === 'success') {
+                log(wait(`    - Model ${ollamaModelName} has been pulled successfully.`))
+                break
+              }
+            } catch (parseError) {
+              console.error(`Error parsing JSON: ${parseError}`)
+            }
+          }
+        }
+      } else {
+        log(wait(`\n  Model ${ollamaModelName} is already available...`))
+      }
+    } catch (error) {
+      console.error(`Error checking/pulling model: ${error instanceof Error ? error.message : String(error)}`)
+      throw error
+    }
+    
+    log(wait(`    - Sending chat request to http://${ollamaHost}:${ollamaPort} using ${ollamaModelName} model`))
+    
+    // Call the Ollama chat API with streaming enabled
     const response = await fetch(`http://${ollamaHost}:${ollamaPort}/api/chat`, {
       method: 'POST',
       headers: {
@@ -37,7 +125,7 @@ export const callOllama: LLMFunction = async (promptAndTranscript: string, tempP
       body: JSON.stringify({
         model: ollamaModelName,
         messages: [{ role: 'user', content: promptAndTranscript }],
-        stream: false,
+        stream: true,
       }),
     })
 
@@ -45,15 +133,46 @@ export const callOllama: LLMFunction = async (promptAndTranscript: string, tempP
       throw new Error(`HTTP error! status: ${response.status}`)
     }
 
-    // Type assertion to enforce the structure of the response
-    // const data = await response.json() as any
-    const data = await response.json() as OllamaResponse
+    if (!response.body) {
+      throw new Error('Response body is null')
+    }
+
+    const reader = response.body.getReader()
+    const decoder = new TextDecoder()
+    let fullContent = ''
+    let isFirstChunk = true
+
+    while (true) {
+      const { done, value } = await reader.read()
+      if (done) break
+
+      const chunk = decoder.decode(value)
+      const lines = chunk.split('\n')
+
+      for (const line of lines) {
+        if (line.trim() === '') continue
+
+        try {
+          const parsedResponse = JSON.parse(line) as OllamaResponse
+          if (parsedResponse.message?.content) {
+            if (isFirstChunk) {
+              log(wait(`    - Receiving streaming response from Ollama...`))
+              isFirstChunk = false
+            }
+            fullContent += parsedResponse.message.content
+          }
+
+          if (parsedResponse.done) {
+            log(wait(`    - Completed receiving response from Ollama.`))
+          }
+        } catch (parseError) {
+          console.error(`Error parsing JSON: ${parseError}`)
+        }
+      }
+    }
 
-    // Extract the assistant's reply and write the response to the output file
-    const assistantReply = data.message.content
-    log(wait(`  - Received response from Ollama.`))
-    await writeFile(tempPath, assistantReply)
-    log(wait(`\n  Transcript saved to temporary file:\n    - ${tempPath}`))
+    // Write the full content to the output file
+    await writeFile(tempPath, fullContent)
   } catch (error) {
     console.error(`Error in callOllama: ${error instanceof Error ? (error as Error).message : String(error)}`)
     console.error(`Stack Trace: ${error instanceof Error ? error.stack : 'No stack trace available'}`)
diff --git a/src/types.ts b/src/types.ts
index 2136e4f..4357970 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -343,6 +343,24 @@ export type OllamaResponse = {
   eval_duration: number
 }
 
+export type OllamaTagsResponse = {
+  models: Array<{
+    name: string
+    model: string
+    modified_at: string
+    size: number
+    digest: string
+    details: {
+      parent_model: string
+      format: string
+      family: string
+      families: string[]
+      parameter_size: string
+      quantization_level: string
+    }
+  }>
+}
+
 /**
  * Represents the function signature for cleaning up temporary files.
  */

From 9c59254bb1e9ac28fc2d3e054b70d42c625936f4 Mon Sep 17 00:00:00 2001
From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com>
Date: Fri, 11 Oct 2024 02:56:04 -0500
Subject: [PATCH 02/10] check for empty whisper model case

---
 src/transcription/whisper.ts       | 21 +++++++++++++--------
 src/transcription/whisperDocker.ts | 11 ++++++++---
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/src/transcription/whisper.ts b/src/transcription/whisper.ts
index 2e25dcf..63d2b8f 100644
--- a/src/transcription/whisper.ts
+++ b/src/transcription/whisper.ts
@@ -6,7 +6,7 @@ import { promisify } from 'node:util'
 import { existsSync } from 'node:fs'
 import { WHISPER_MODELS } from '../models.js'
 import { log, wait } from '../models.js'
-import type { ProcessingOptions } from '../types.js'
+import type { ProcessingOptions, WhisperModelType } from '../types.js'
 
 const execPromise = promisify(exec)
 
@@ -21,29 +21,34 @@ export async function callWhisper(options: ProcessingOptions, finalPath: string)
   log(wait('\n  Using Whisper for transcription...'))
   try {
     // Get the whisper model from options or use 'base' as default
-    const whisperModel = options.whisper || 'base'
-    
+    let whisperModel = 'base'
+    if (typeof options.whisper === 'string') {
+      whisperModel = options.whisper
+    } else if (options.whisper !== true) {
+      throw new Error('Invalid whisper option')
+    }
+
     if (!(whisperModel in WHISPER_MODELS)) {
       throw new Error(`Unknown model type: ${whisperModel}`)
     }
 
     // Get the model ggml file name
-    const modelGGMLName = WHISPER_MODELS[whisperModel]
+    const modelGGMLName = WHISPER_MODELS[whisperModel as WhisperModelType]
 
     log(wait(`\n    - whisperModel: ${whisperModel}\n    - modelGGMLName: ${modelGGMLName}`))
 
     // Setup Whisper
     if (!existsSync('./whisper.cpp')) {
-      log(`\nNo whisper.cpp repo found, running git clone and make...\n`)
+      log(`\n  No whisper.cpp repo found, running git clone and make...\n`)
       await execPromise('git clone https://github.com/ggerganov/whisper.cpp.git && make -C whisper.cpp && cp .github/whisper.Dockerfile whisper.cpp/Dockerfile')
-      log(`\nwhisper.cpp clone and make commands complete.\n`)
+      log(`\n    - whisper.cpp clone and make commands complete.\n`)
     }
 
     // Ensure model is downloaded
     if (!existsSync(`./whisper.cpp/models/ggml-${whisperModel}.bin`)) {
-      log(wait(`  Model not found, downloading...\n    - ${whisperModel}\n`))
+      log(wait(`\n  Model not found, downloading...\n    - ${whisperModel}\n`))
       await execPromise(`bash ./whisper.cpp/models/download-ggml-model.sh ${whisperModel}`)
-      log(wait('  Model download completed, running transcription...\n'))
+      log(wait('    - Model download completed, running transcription...\n'))
     }
 
     // Run transcription
diff --git a/src/transcription/whisperDocker.ts b/src/transcription/whisperDocker.ts
index 22b2566..ac11458 100644
--- a/src/transcription/whisperDocker.ts
+++ b/src/transcription/whisperDocker.ts
@@ -6,7 +6,7 @@ import { promisify } from 'node:util'
 import { join } from 'node:path'
 import { WHISPER_MODELS } from '../models.js'
 import { log, wait } from '../models.js'
-import type { ProcessingOptions } from '../types.js'
+import type { ProcessingOptions, WhisperModelType } from '../types.js'
 
 const execPromise = promisify(exec)
 
@@ -21,14 +21,19 @@ export async function callWhisperDocker(options: ProcessingOptions, finalPath: s
   log(wait('\n  Using Whisper Docker for transcription...'))
   try {
     // Get the whisper model from options or use 'base' as default
-    const whisperModel = options.whisperDocker || 'base'
+    let whisperModel = 'base'
+    if (typeof options.whisperDocker === 'string') {
+      whisperModel = options.whisperDocker
+    } else if (options.whisperDocker !== true) {
+      throw new Error('Invalid whisperDocker option')
+    }
     
     if (!(whisperModel in WHISPER_MODELS)) {
       throw new Error(`Unknown model type: ${whisperModel}`)
     }
 
     // Get the model ggml file name
-    const modelGGMLName = WHISPER_MODELS[whisperModel]
+    const modelGGMLName = WHISPER_MODELS[whisperModel as WhisperModelType]
     const CONTAINER_NAME = 'autoshow-whisper-1'
     const modelPathContainer = `/app/models/${modelGGMLName}`
 

From af5d34ddc1a88ee0eca3825e5cb33a4658794bd2 Mon Sep 17 00:00:00 2001
From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com>
Date: Fri, 11 Oct 2024 03:05:14 -0500
Subject: [PATCH 03/10] update setup script, local tests, and deps

---
 package.json       | 44 ++++++++++++------------
 scripts/setup.sh   | 85 +++++++++++++++++++++++++++++++---------------
 test/local.test.js | 51 ++++++++++------------------
 3 files changed, 97 insertions(+), 83 deletions(-)

diff --git a/package.json b/package.json
index f3abd1e..8d4a65e 100644
--- a/package.json
+++ b/package.json
@@ -40,29 +40,29 @@
     "deno-as": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env src/autoshow.ts"
   },
   "dependencies": {
-    "@anthropic-ai/sdk": "^0.26.0",
-    "@deepgram/sdk": "^3.5.1",
-    "@fastify/cors": "^10.0.1",
-    "@google/generative-ai": "^0.17.1",
-    "@mistralai/mistralai": "^1.0.2",
-    "@octoai/sdk": "^1.5.1",
-    "assemblyai": "^4.6.1",
-    "chalk": "^5.3.0",
-    "cohere-ai": "^7.12.0",
-    "commander": "^12.1.0",
-    "fast-xml-parser": "^4.4.1",
-    "fastify": "^5.0.0",
-    "ffmpeg-static": "^5.2.0",
-    "file-type": "^19.4.1",
-    "inquirer": "^10.2.2",
-    "node-llama-cpp": "^3.1.0",
-    "ollama": "^0.5.9",
-    "openai": "^4.55.7"
+    "@anthropic-ai/sdk": "0.29.0",
+    "@deepgram/sdk": "3.8.1",
+    "@fastify/cors": "10.0.1",
+    "@google/generative-ai": "0.21.0",
+    "@mistralai/mistralai": "1.1.0",
+    "@octoai/sdk": "1.11.0",
+    "assemblyai": "4.7.1",
+    "chalk": "5.3.0",
+    "cohere-ai": "7.14.0",
+    "commander": "12.1.0",
+    "fast-xml-parser": "4.5.0",
+    "fastify": "5.0.0",
+    "ffmpeg-static": "5.2.0",
+    "file-type": "19.5.0",
+    "inquirer": "12.0.0",
+    "node-llama-cpp": "3.1.1",
+    "ollama": "0.5.9",
+    "openai": "4.67.3"
   },
   "devDependencies": {
-    "@types/inquirer": "^9.0.7",
-    "@types/node": "^22.7.5",
-    "tsx": "^4.19.1",
-    "typescript": "^5.6.2"
+    "@types/inquirer": "9.0.7",
+    "@types/node": "22.7.5",
+    "tsx": "4.19.1",
+    "typescript": "5.6.3"
   }
 }
diff --git a/scripts/setup.sh b/scripts/setup.sh
index 55022b4..3a4a1df 100755
--- a/scripts/setup.sh
+++ b/scripts/setup.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# scripts/setup.sh
 
 # Function to check if a command exists
 command_exists() {
@@ -21,45 +22,73 @@ else
     echo "yt-dlp is already installed."
 fi
 
+# Function to check if Ollama server is running
+check_ollama_server() {
+    if curl -s "http://127.0.0.1:11434" &> /dev/null; then
+        echo "Ollama server is already running."
+    else
+        echo "Ollama server is not running. Starting Ollama server..."
+        ollama serve > ollama.log 2>&1 &
+        OLLAMA_PID=$!
+        echo "Ollama server started with PID $OLLAMA_PID"
+        sleep 5
+    fi
+}
+
+# Function to check if a model is available, and pull it if not
+check_and_pull_model() {
+    local model=$1
+    if ollama list | grep -q "$model"; then
+        echo "Model $model is already available."
+    else
+        echo "Model $model is not available. Pulling the model..."
+        ollama pull "$model"
+    fi
+}
+
 # Check if Ollama is installed
 if ! command_exists ollama; then
     echo "Ollama is not installed, refer to installation instructions here:"
     echo "https://github.com/ollama/ollama"
 else
     echo "Ollama is installed."
-fi
-
-# Check if Ollama server is running
-if ! curl -s "http://127.0.0.1:11434" &> /dev/null; then
-    echo "Ollama server is not running. Starting Ollama server..."
-    ollama serve > ollama.log 2>&1 &
-    OLLAMA_PID=$!
-    echo "Ollama server started with PID $OLLAMA_PID"
-    sleep 5
-else
-    echo "Ollama server is already running."
+    
+    # Check if Ollama server is running
+    check_ollama_server
+    
+    # Check and pull required models
+    check_and_pull_model "llama3.2:1b"
 fi
 
 # Install npm dependencies
 npm i
 
-# Clone whisper.cpp repository
-git clone https://github.com/ggerganov/whisper.cpp.git
-
-# Download whisper models
-bash ./whisper.cpp/models/download-ggml-model.sh base
-bash ./whisper.cpp/models/download-ggml-model.sh large-v2
-
-# Compile whisper.cpp
-make -C whisper.cpp
-
-# Copy Dockerfile
-cp .github/whisper.Dockerfile whisper.cpp/Dockerfile
-
-# Download Qwen 2.5 1.5B model for Llama.cpp
-curl -L "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q6_k.gguf" -o "./src/llms/models/qwen2.5-1.5b-instruct-q6_k.gguf"
+# Check if whisper.cpp directory exists
+if [ -d "whisper.cpp" ]; then
+    echo "whisper.cpp directory already exists. Skipping clone and setup."
+else
+    echo "Cloning whisper.cpp repository..."
+    git clone https://github.com/ggerganov/whisper.cpp.git
+    
+    # Download whisper models
+    echo "Downloading whisper models..."
+    bash ./whisper.cpp/models/download-ggml-model.sh base
+    
+    # Compile whisper.cpp
+    echo "Compiling whisper.cpp..."
+    make -C whisper.cpp
+    
+    # Copy Dockerfile
+    echo "Copying Dockerfile..."
+    cp .github/whisper.Dockerfile whisper.cpp/Dockerfile
+fi
 
-# Pull Llama 3.1 1B model using Ollama
-ollama pull llama3.2:1b
+# Check if Qwen 2.5 1.5B model exists
+if [ -f "./src/llms/models/qwen2.5-1.5b-instruct-q6_k.gguf" ]; then
+    echo "Qwen 2.5 1.5B model already exists. Skipping download."
+else
+    echo "Downloading Qwen 2.5 1.5B model..."
+    curl -L "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q6_k.gguf" -o "./src/llms/models/qwen2.5-1.5b-instruct-q6_k.gguf"
+fi
 
 echo "Setup completed successfully!"
\ No newline at end of file
diff --git a/test/local.test.js b/test/local.test.js
index 2d7a520..bc99858 100644
--- a/test/local.test.js
+++ b/test/local.test.js
@@ -37,68 +37,53 @@ const commands = [
     newName: 'FILE_05.md'
   },
   {
-    cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --llama',
-    expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md',
-    newName: 'FILE_06.md'
-  },
-  {
-    cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama',
+    cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --ollama LLAMA_3_2_3B',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md',
-    newName: 'FILE_07.md'
+    newName: 'FILE_06.md'
   },
   {
     cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisper tiny',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md',
-    newName: 'FILE_08.md'
+    newName: 'FILE_07.md'
   },
+  // {
+  //   cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker tiny',
+  //   expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md',
+  //   newName: 'FILE_08.md'
+  // },
   {
-    cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --whisperDocker tiny',
+    cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary mediumChapters takeaways questions',
     expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md',
     newName: 'FILE_09.md'
   },
   {
-    cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles',
-    expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md',
+    cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary shortChapters takeaways questions --whisper tiny --ollama',
+    expectedFile: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md',
     newName: 'FILE_10.md'
   },
   {
-    cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary mediumChapters takeaways questions',
-    expectedFile: '2024-09-24-ep0-fsjam-podcast-prompt.md',
-    newName: 'FILE_11.md'
-  },
-  {
-    cmd: 'npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --prompt titles summary shortChapters takeaways questions --whisper tiny --llama',
-    expectedFile: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md',
-    newName: 'FILE_12.md'
-  },
-  {
-    cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" --prompt titles --whisper tiny --llama',
+    cmd: 'npm run as -- --playlist "https://www.youtube.com/playlist?list=PLCVnrVv4KhXPz0SoAVu8Rc1emAdGPbSbr" --prompt titles --whisper tiny --ollama',
     expectedFiles: [
-      { file: '2024-09-24-ep1-fsjam-podcast-llama-shownotes.md', newName: 'FILE_13A.md' },
-      { file: '2024-09-24-ep0-fsjam-podcast-llama-shownotes.md', newName: 'FILE_13B.md' }
+      { file: '2024-09-24-ep1-fsjam-podcast-ollama-shownotes.md', newName: 'FILE_11A.md' },
+      { file: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md', newName: 'FILE_11B.md' }
     ]
   },
   {
     cmd: 'npm run as -- --urls "content/example-urls.md" --prompt titles --whisper tiny --ollama',
     expectedFiles: [
-      { file: '2024-09-24-ep1-fsjam-podcast-ollama-shownotes.md', newName: 'FILE_14A.md' },
-      { file: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md', newName: 'FILE_14B.md' }
+      { file: '2024-09-24-ep1-fsjam-podcast-ollama-shownotes.md', newName: 'FILE_12A.md' },
+      { file: '2024-09-24-ep0-fsjam-podcast-ollama-shownotes.md', newName: 'FILE_12B.md' }
     ]
   },
   {
     cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed"',
     expectedFile: '2021-05-10-thoughts-on-lambda-school-layoffs-prompt.md',
-    newName: 'FILE_15.md'
-  },
-  {
-    cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed" --item "https://api.substack.com/feed/podcast/36236609/fd1f1532d9842fe1178de1c920442541.mp3" --whisper tiny --llama --prompt titles summary longChapters takeaways questions',
-    expectedFile: '2021-05-10-thoughts-on-lambda-school-layoffs-llama-shownotes.md',
-    newName: 'FILE_16.md',
+    newName: 'FILE_13.md'
   },
   {
     cmd: 'npm run as -- --rss "https://ajcwebdev.substack.com/feed" --info',
     expectedFile: 'rss_info.json',
-    newName: 'FILE_17_rss_info.json',
+    newName: 'FILE_14_rss_info.json',
   }
 ]
 

From dacb7c07fe5dca1bce1b13a982852654b0a90406 Mon Sep 17 00:00:00 2001
From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com>
Date: Sun, 13 Oct 2024 02:00:11 -0500
Subject: [PATCH 04/10] clean content directory script

---
 package.json            |  1 +
 scripts/cleanContent.ts | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 scripts/cleanContent.ts

diff --git a/package.json b/package.json
index 8d4a65e..821bb2a 100644
--- a/package.json
+++ b/package.json
@@ -36,6 +36,7 @@
     "t": "npm run test-local",
     "test-local": "node --test test/local.test.js",
     "test-all": "node --test test/all.test.js",
+    "clean": "tsx scripts/cleanContent.ts",
     "bun-as": "bun --env-file=.env --no-warnings src/autoshow.ts",
     "deno-as": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env src/autoshow.ts"
   },
diff --git a/scripts/cleanContent.ts b/scripts/cleanContent.ts
new file mode 100644
index 0000000..7f3ef32
--- /dev/null
+++ b/scripts/cleanContent.ts
@@ -0,0 +1,26 @@
+// scripts/cleanContent.ts
+
+import { exec } from 'child_process'
+import { promisify } from 'util'
+
+const execAsync = promisify(exec)
+
+async function cleanContent() {
+  try {
+    const { stdout, stderr } = await execAsync(
+      'find content -type f -not \\( -name ".gitkeep" -o -name "audio.mp3" -o -name "example-urls.md" \\) -delete'
+    )
+    if (stderr) {
+      console.error('Error:', stderr)
+      return
+    }
+    console.log('Files deleted successfully')
+    if (stdout) {
+      console.log('Output:', stdout)
+    }
+  } catch (error) {
+    console.error('Execution error:', error)
+  }
+}
+
+cleanContent()
\ No newline at end of file

From e4c6fe5be3bd35cf1cdfc3f50e9b019e1985b4d5 Mon Sep 17 00:00:00 2001
From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com>
Date: Sun, 13 Oct 2024 04:10:05 -0500
Subject: [PATCH 05/10] add whisper python version for turbo

---
 src/autoshow.ts                    |  1 +
 src/models.ts                      | 24 +++++++-
 src/transcription/whisperPython.ts | 90 ++++++++++++++++++++++++++++++
 src/types.ts                       |  6 +-
 src/utils/runTranscription.ts      | 87 ++++++++++-------------------
 5 files changed, 145 insertions(+), 63 deletions(-)
 create mode 100644 src/transcription/whisperPython.ts

diff --git a/src/autoshow.ts b/src/autoshow.ts
index d374438..427c7bd 100644
--- a/src/autoshow.ts
+++ b/src/autoshow.ts
@@ -45,6 +45,7 @@ program
   .option('--info', 'Generate JSON file with RSS feed information instead of processing items')
   .option('--whisper [model]', 'Use Whisper.cpp for transcription with optional model specification')
   .option('--whisperDocker [model]', 'Use Whisper.cpp in Docker for transcription with optional model specification')
+  .option('--whisperPython [model]', 'Use openai-whisper for transcription with optional model specification')
   .option('--deepgram', 'Use Deepgram for transcription')
   .option('--assembly', 'Use AssemblyAI for transcription')
   .option('--speakerLabels', 'Use speaker labels for AssemblyAI transcription')
diff --git a/src/models.ts b/src/models.ts
index 76b7492..5ea7929 100644
--- a/src/models.ts
+++ b/src/models.ts
@@ -15,13 +15,13 @@ export const log: typeof console.log = console.log
 
 export const ACTION_OPTIONS = ['video', 'playlist', 'urls', 'file', 'rss']
 export const LLM_OPTIONS = ['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'ollama', 'gemini']
-export const TRANSCRIPT_OPTIONS = ['whisper', 'whisperDocker', 'deepgram', 'assembly']
+export const TRANSCRIPT_OPTIONS = ['whisper', 'whisperDocker', 'whisperPython', 'deepgram', 'assembly']
 
 /**
- * Define available Whisper models
+ * Define available Whisper models for whisper.cpp
  * @type {Record<WhisperModelType, string>}
  */
-export const WHISPER_MODELS: Record<WhisperModelType, string> = {
+export const WHISPER_MODELS: Record<string, string> = {
   'tiny': 'ggml-tiny.bin',
   'tiny.en': 'ggml-tiny.en.bin',
   'base': 'ggml-base.bin',
@@ -34,6 +34,24 @@ export const WHISPER_MODELS: Record<WhisperModelType, string> = {
   'large-v2': 'ggml-large-v2.bin',
 }
 
+/**
+ * Define available Whisper models for openai-whisper
+ * @type {Record<WhisperModelType, string>}
+ */
+export const WHISPER_PYTHON_MODELS: Record<WhisperModelType, string> = {
+  tiny: 'tiny',
+  'tiny.en': 'tiny.en',
+  base: 'base',
+  'base.en': 'base.en',
+  small: 'small',
+  'small.en': 'small.en',
+  medium: 'medium',
+  'medium.en': 'medium.en',
+  'large-v1': 'large-v1',
+  'large-v2': 'large-v2',
+  turbo: 'turbo',
+}
+
 /**
  * Map of ChatGPT model identifiers to their API names
  * @type {Record<ChatGPTModelType, string>}
diff --git a/src/transcription/whisperPython.ts b/src/transcription/whisperPython.ts
new file mode 100644
index 0000000..8667f1e
--- /dev/null
+++ b/src/transcription/whisperPython.ts
@@ -0,0 +1,90 @@
+// src/transcription/whisperPython.ts
+
+import { readFile, writeFile, unlink } from 'node:fs/promises'
+import { exec } from 'node:child_process'
+import { promisify } from 'node:util'
+import { log, wait } from '../models.js'
+import type { ProcessingOptions } from '../types.js'
+import { WHISPER_PYTHON_MODELS } from '../models.js'
+
+const execPromise = promisify(exec)
+
+/**
+ * Main function to handle transcription using openai-whisper Python library.
+ * @param {ProcessingOptions} options - Additional processing options.
+ * @param {string} finalPath - The base path for the files.
+ * @returns {Promise<string>} - Returns the formatted transcript content.
+ * @throws {Error} - If an error occurs during transcription.
+ */
+export async function callWhisperPython(options: ProcessingOptions, finalPath: string): Promise<string> {
+  log(wait('\n  Using openai-whisper Python library for transcription...'))
+
+  try {
+    // Get the whisper model from options or use 'base' as default
+    let whisperModel: string = 'base'
+    if (typeof options.whisperPython === 'string') {
+      whisperModel = options.whisperPython
+    } else if (options.whisperPython !== true) {
+      throw new Error('Invalid whisperPython option')
+    }
+
+    if (!(whisperModel in WHISPER_PYTHON_MODELS)) {
+      throw new Error(`Unknown model type: ${whisperModel}`)
+    }
+
+    log(wait(`\n    - whisperModel: ${whisperModel}`))
+
+    // Check if ffmpeg is installed
+    try {
+      await execPromise('ffmpeg -version')
+    } catch (error) {
+      throw new Error('ffmpeg is not installed or not available in PATH')
+    }
+
+    // Check if Python is installed
+    try {
+      await execPromise('python3 --version')
+    } catch (error) {
+      throw new Error('Python is not installed or not available in PATH')
+    }
+
+    // Check if the openai-whisper package is installed
+    try {
+      // await execPromise('python3 -c "import whisper"')
+      await execPromise('which whisper')
+    } catch (error) {
+      log(wait('\n  openai-whisper not found, installing...'))
+      // await execPromise('pip install -U openai-whisper')
+      await execPromise('brew install openai-whisper')
+      log(wait('    - openai-whisper installed'))
+    }
+
+    // Prepare the command to run the transcription
+    const command = `whisper "${finalPath}.wav" --model ${whisperModel} --output_dir "content" --output_format vtt --language en --word_timestamps True`
+
+    log(wait(`\n  Running transcription with command:\n    ${command}\n`))
+
+    // Execute the command
+    await execPromise(command)
+
+    // Read the generated transcript file
+    const transcriptContent = await readFile(`${finalPath}.vtt`, 'utf8')
+
+    // Write the transcript to the expected output file
+    await writeFile(`${finalPath}.txt`, transcriptContent)
+
+    // Create an empty LRC file to prevent cleanup errors and unlink VTT file
+    await writeFile(`${finalPath}.lrc`, '')
+    log(wait(`\n  Empty LRC file created:\n    - ${finalPath}.lrc\n`))
+    await unlink(`${finalPath}.vtt`)
+    log(wait(`\n  VTT file deleted:\n    - ${finalPath}.vtt\n`))
+
+    log(wait(`\n  Transcript successfully completed:\n    - ${finalPath}.txt\n`))
+
+    return transcriptContent
+
+  } catch (error) {
+    console.error('Error in callWhisperPython:', (error as Error).message)
+    process.exit(1)
+  }
+}
diff --git a/src/types.ts b/src/types.ts
index 4357970..27d79af 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -26,6 +26,8 @@ export type ProcessingOptions = {
   noCleanUp?: boolean
   /** The Whisper model to use (e.g., 'tiny', 'base'). */
   whisper?: WhisperModelType
+  /** The Whisper Python model to use (e.g., 'tiny', 'base'). */
+  whisperPython?: WhisperModelType
   /** The Whisper model to use with Docker (e.g., 'tiny', 'base'). */
   whisperDocker?: WhisperModelType
   /** Flag to use Deepgram for transcription. */
@@ -231,7 +233,7 @@ export type SupportedFileType = 'wav' | 'mp3' | 'm4a' | 'aac' | 'ogg' | 'flac' |
  * - deepgram: Use Deepgram's transcription service.
  * - assembly: Use AssemblyAI's transcription service.
  */
-export type TranscriptServices = 'whisper' | 'whisperDocker' | 'deepgram' | 'assembly'
+export type TranscriptServices = 'whisper' | 'whisperDocker' | 'whisperPython' | 'deepgram' | 'assembly'
 
 /**
  * Represents the available Whisper model types.
@@ -247,7 +249,7 @@ export type TranscriptServices = 'whisper' | 'whisperDocker' | 'deepgram' | 'ass
  * - large-v1: Large multilingual model version 1.
  * - large-v2: Large multilingual model version 2.
  */
-export type WhisperModelType = 'tiny' | 'tiny.en' | 'base' | 'base.en' | 'small' | 'small.en' | 'medium' | 'medium.en' | 'large-v1' | 'large-v2'
+export type WhisperModelType = 'tiny' | 'tiny.en' | 'base' | 'base.en' | 'small' | 'small.en' | 'medium' | 'medium.en' | 'large-v1' | 'large-v2' | 'turbo'
 
 /**
  * Represents the object containing the different prompts, their instructions to the LLM, and their expected example output.
diff --git a/src/utils/runTranscription.ts b/src/utils/runTranscription.ts
index dfde1cc..a5cf2dc 100644
--- a/src/utils/runTranscription.ts
+++ b/src/utils/runTranscription.ts
@@ -1,75 +1,46 @@
 // src/utils/runTranscription.ts
 
-import { readFile, writeFile } from 'node:fs/promises'
 import { callWhisper } from '../transcription/whisper.js'
+import { callWhisperPython } from '../transcription/whisperPython.js'
 import { callWhisperDocker } from '../transcription/whisperDocker.js'
 import { callDeepgram } from '../transcription/deepgram.js'
 import { callAssembly } from '../transcription/assembly.js'
-import { log, step, success, wait } from '../models.js'
-import type { TranscriptServices, ProcessingOptions } from '../types.js'
+import { log, step } from '../models.js'
+import { TranscriptServices, ProcessingOptions } from '../types.js'
 
 /**
- * Main function to run transcription.
+ * Manages the transcription process based on the selected service.
+ * @param {ProcessingOptions} options - The processing options.
+ * * @param {TranscriptServices} transcriptServices - The transcription service to use.
  * @param {string} finalPath - The base path for the files.
- * @param {string} frontMatter - Optional front matter content for the markdown file.
- * @param {TranscriptServices} transcriptServices - The transcription service to use.
- * @param {ProcessingOptions} [options] - Additional processing options.
- * @returns {Promise<string>} - Returns the final content including markdown and transcript.
- * @throws {Error} - If the transcription service fails or an error occurs during processing.
+ * @returns {Promise<void>}
  */
 export async function runTranscription(
   options: ProcessingOptions,
   finalPath: string,
   frontMatter: string,
   transcriptServices?: TranscriptServices
-): Promise<string> {
-  log(step(`\nStep 3 - Running transcription on audio file...`))
-  try {
-    let txtContent: string
-
-    // Choose the transcription service based on the provided option
-    switch (transcriptServices) {
-      case 'deepgram':
-        txtContent = await callDeepgram(options, finalPath)
-        break
-
-      case 'assembly':
-        txtContent = await callAssembly(options, finalPath)
-        break
-      
-      case 'whisperDocker':
-        txtContent = await callWhisperDocker(options, finalPath)
-        break  
-
-      case 'whisper':
-      default:
-        txtContent = await callWhisper(options, finalPath)
-        break
-    }
-
-    let mdContent = frontMatter
-    try {
-      // Attempt to read existing markdown content
-      const existingContent = await readFile(`${finalPath}.md`, 'utf8')
-      mdContent += existingContent
-    } catch (error) {
-      if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
-        console.error(`Error reading markdown file: ${(error as Error).message}`)
-        throw error
-      }
-      // If the file does not exist, proceed without appending
-    }
-
-    // Combine existing markdown content with the transcript
-    const finalContent = `${mdContent}\n## Transcript\n\n${txtContent}`
-
-    // Write final markdown file, including existing content and the new transcript
-    await writeFile(`${finalPath}.md`, finalContent)
-    log(success(`  Markdown file successfully updated with transcript:\n    - ${finalPath}.md`))
-
-    return finalContent
-  } catch (error) {
-    console.error(`Error in transcription process: ${(error as Error).message}`)
-    throw error
+): Promise<void> {
+  log(step(`\nStep 3 - Running transcription on audio file using ${transcriptServices}...`))
+
+  // Choose the transcription service based on the provided option
+  switch (transcriptServices) {
+    case 'deepgram':
+      await callDeepgram(options, finalPath)
+      break
+    case 'assembly':
+      await callAssembly(options, finalPath)
+      break
+    case 'whisper':
+      await callWhisper(options, finalPath)
+      break
+    case 'whisperDocker':
+      await callWhisperDocker(options, finalPath)
+      break
+    case 'whisperPython':
+      await callWhisperPython(options, finalPath)
+      break
+    default:
+      throw new Error(`Unknown transcription service: ${transcriptServices}`)
   }
 }
\ No newline at end of file

From 1c61f0d64fdfa51a481c0c2f28e9137fe716753d Mon Sep 17 00:00:00 2001
From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com>
Date: Mon, 14 Oct 2024 21:58:47 -0500
Subject: [PATCH 06/10] add fireworks and together options

---
 docs/examples.md      | 12 ++++++
 src/autoshow.ts       |  2 +
 src/llms/fireworks.ts | 88 +++++++++++++++++++++++++++++++++++++++++
 src/llms/together.ts  | 91 +++++++++++++++++++++++++++++++++++++++++++
 src/models.ts         |  2 +-
 src/types.ts          |  6 ++-
 src/utils/runLLM.ts   |  4 ++
 7 files changed, 203 insertions(+), 2 deletions(-)
 create mode 100644 src/llms/fireworks.ts
 create mode 100644 src/llms/together.ts

diff --git a/docs/examples.md b/docs/examples.md
index cac96c9..fcb76e3 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -245,6 +245,18 @@ npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo NOUS_
 npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo WIZARD_2_8X_22B
 ```
 
+### Fireworks
+
+```bash
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks
+```
+
+### Together
+
+```bash
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together
+```
+
 ### Llama.cpp
 
 ```bash
diff --git a/src/autoshow.ts b/src/autoshow.ts
index 427c7bd..53be20e 100644
--- a/src/autoshow.ts
+++ b/src/autoshow.ts
@@ -54,6 +54,8 @@ program
   .option('--cohere [model]', 'Use Cohere for processing with optional model specification')
   .option('--mistral [model]', 'Use Mistral for processing')
   .option('--octo [model]', 'Use Octo for processing')
+  .option('--fireworks [model]', 'Use Fireworks AI for processing with optional model specification')
+  .option('--together [model]', 'Use Together AI for processing with optional model specification')
   .option('--llama [model]', 'Use Node Llama for processing with optional model specification')
   .option('--ollama [model]', 'Use Ollama for processing with optional model specification')
   .option('--gemini [model]', 'Use Gemini for processing with optional model specification')
diff --git a/src/llms/fireworks.ts b/src/llms/fireworks.ts
new file mode 100644
index 0000000..c7c2ed4
--- /dev/null
+++ b/src/llms/fireworks.ts
@@ -0,0 +1,88 @@
+// src/llms/fireworks.ts
+
+import { writeFile } from 'node:fs/promises'
+import { env } from 'node:process'
+import fetch from 'node-fetch'
+import { log, wait } from '../models.js'
+import type { LLMFunction } from '../types.js'
+
+/**
+ * Main function to call Fireworks AI API.
+ * @param promptAndTranscript - The combined prompt and transcript text to process.
+ * @param tempPath - The temporary file path to write the LLM output.
+ * @param model - The Fireworks model to use.
+ * @returns A Promise that resolves when the API call is complete.
+ * @throws {Error} - If an error occurs during the API call.
+ */
+export const callFireworks: LLMFunction = async (
+  promptAndTranscript: string,
+  tempPath: string,
+  model: string = 'accounts/fireworks/models/llama-v3p2-3b-instruct'
+): Promise<void> => {
+  // Check if the FIREWORKS_API_KEY environment variable is set
+  if (!env.FIREWORKS_API_KEY) {
+    throw new Error('FIREWORKS_API_KEY environment variable is not set. Please set it to your Fireworks API key.')
+  }
+
+  try {
+    log(wait(`\n  Using Fireworks model:\n    - ${model}`))
+
+    // Prepare the request body
+    const requestBody = {
+      model: "accounts/fireworks/models/llama-v3p2-3b-instruct",
+      messages: [
+        {
+          role: 'user',
+          content: promptAndTranscript,
+        },
+      ],
+    }
+
+    // Make API call to Fireworks AI
+    const response = await fetch('https://api.fireworks.ai/inference/v1/chat/completions', {
+      method: 'POST',
+      headers: {
+        Authorization: `Bearer ${env.FIREWORKS_API_KEY}`,
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify(requestBody),
+    })
+
+    // Check if the response is OK
+    if (!response.ok) {
+      const errorText = await response.text()
+      throw new Error(`Fireworks API error: ${response.status} ${response.statusText} - ${errorText}`)
+    }
+
+    const data = await response.json()
+
+    // Extract the generated content
+    const content = data.choices[0]?.message?.content
+    const finishReason = data.choices[0]?.finish_reason
+    const usedModel = data.model
+    const usage = data.usage
+
+    if (!content) {
+      throw new Error('No content generated from the Fireworks API')
+    }
+
+    // Write the generated content to the specified output file
+    await writeFile(tempPath, content)
+    log(wait(`\n  Fireworks response saved to ${tempPath}`))
+
+    // Log finish reason, used model, and token usage
+    log(wait(`\n  Finish Reason: ${finishReason}\n  Model Used: ${usedModel}`))
+    if (usage) {
+      const { prompt_tokens, completion_tokens, total_tokens } = usage
+      log(
+        wait(
+          `  Token Usage:\n    - ${prompt_tokens} prompt tokens\n    - ${completion_tokens} completion tokens\n    - ${total_tokens} total tokens`
+        )
+      )
+    }
+  } catch (error) {
+    // Log any errors that occur during the process
+    console.error(`Error in callFireworks: ${(error as Error).message}`)
+    throw error // Re-throw the error for handling by the caller
+  }
+}
\ No newline at end of file
diff --git a/src/llms/together.ts b/src/llms/together.ts
new file mode 100644
index 0000000..d7e4142
--- /dev/null
+++ b/src/llms/together.ts
@@ -0,0 +1,91 @@
+// src/llms/together.ts
+
+import { writeFile } from 'node:fs/promises'
+import { env } from 'node:process'
+import fetch from 'node-fetch'
+import { log, wait } from '../models.js'
+import type { LLMFunction } from '../types.js'
+
+/**
+ * Main function to call Together AI API.
+ * @param promptAndTranscript - The combined prompt and transcript text to process.
+ * @param tempPath - The temporary file path to write the LLM output.
+ * @param model - The Together AI model to use.
+ * @returns A Promise that resolves when the API call is complete.
+ * @throws {Error} - If an error occurs during the API call.
+ */
+export const callTogether: LLMFunction = async (
+  promptAndTranscript: string,
+  tempPath: string,
+  model: string = 'meta-llama/Llama-3.2-3B-Instruct-Turbo'
+): Promise<void> => {
+  // Check if the TOGETHER_API_KEY environment variable is set
+  if (!env.TOGETHER_API_KEY) {
+    throw new Error('TOGETHER_API_KEY environment variable is not set. Please set it to your Together AI API key.')
+  }
+
+  try {
+    log(wait(`\n  Using Together AI model:\n    - ${model}`))
+
+    // Prepare the request body
+    const requestBody = {
+      model: 'meta-llama/Llama-3.2-3B-Instruct-Turbo',
+      messages: [
+        {
+          role: 'user',
+          content: promptAndTranscript,
+        },
+      ],
+      max_tokens: 2000,
+      temperature: 0.7,
+    }
+
+    // Make API call to Together AI
+    const response = await fetch('https://api.together.xyz/v1/chat/completions', {
+      method: 'POST',
+      headers: {
+        accept: 'application/json',
+        'content-type': 'application/json',
+        authorization: `Bearer ${env.TOGETHER_API_KEY}`,
+      },
+      body: JSON.stringify(requestBody),
+    })
+
+    // Check if the response is OK
+    if (!response.ok) {
+      const errorText = await response.text()
+      throw new Error(`Together AI API error: ${response.status} ${response.statusText} - ${errorText}`)
+    }
+
+    const data = await response.json()
+
+    // Extract the generated content
+    const content = data.choices[0]?.message?.content
+    const finishReason = data.choices[0]?.finish_reason
+    const usedModel = data.model
+    const usage = data.usage
+
+    if (!content) {
+      throw new Error('No content generated from the Together AI API')
+    }
+
+    // Write the generated content to the specified output file
+    await writeFile(tempPath, content)
+    log(wait(`\n  Together AI response saved to ${tempPath}`))
+
+    // Log finish reason, used model, and token usage
+    log(wait(`\n  Finish Reason: ${finishReason}\n  Model Used: ${usedModel}`))
+    if (usage) {
+      const { prompt_tokens, completion_tokens, total_tokens } = usage
+      log(
+        wait(
+          `  Token Usage:\n    - ${prompt_tokens} prompt tokens\n    - ${completion_tokens} completion tokens\n    - ${total_tokens} total tokens`
+        )
+      )
+    }
+  } catch (error) {
+    // Log any errors that occur during the process
+    console.error(`Error in callTogether: ${(error as Error).message}`)
+    throw error // Re-throw the error for handling by the caller
+  }
+}
\ No newline at end of file
diff --git a/src/models.ts b/src/models.ts
index 5ea7929..1684fce 100644
--- a/src/models.ts
+++ b/src/models.ts
@@ -14,7 +14,7 @@ export const final: ChalkInstance = chalk.bold.italic
 export const log: typeof console.log = console.log
 
 export const ACTION_OPTIONS = ['video', 'playlist', 'urls', 'file', 'rss']
-export const LLM_OPTIONS = ['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'ollama', 'gemini']
+export const LLM_OPTIONS = ['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'ollama', 'gemini', 'fireworks', 'together']
 export const TRANSCRIPT_OPTIONS = ['whisper', 'whisperDocker', 'whisperPython', 'deepgram', 'assembly']
 
 /**
diff --git a/src/types.ts b/src/types.ts
index 27d79af..2c68cd8 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -46,6 +46,10 @@ export type ProcessingOptions = {
   mistral?: string
   /** OctoAI model to use (e.g., 'LLAMA_3_1_8B'). */
   octo?: string
+  /**  Fireworks model to use (e.g., ''). */
+  fireworks?: string
+  /**  Together model to use (e.g., ''). */
+  together?: string
   /** Ollama model to use for local inference (e.g., 'LLAMA_3_2_1B'). */
   ollama?: string
   /** Llama model to use for local inference (e.g., 'LLAMA_3_1_8B'). */
@@ -273,7 +277,7 @@ export type PromptSection = {
  * - ollama: Use Ollama for processing.
  * - gemini: Use Google's Gemini models.
  */
-export type LLMServices = 'chatgpt' | 'claude' | 'cohere' | 'mistral' | 'octo' | 'llama' | 'ollama' | 'gemini'
+export type LLMServices = 'chatgpt' | 'claude' | 'cohere' | 'mistral' | 'octo' | 'llama' | 'ollama' | 'gemini' | 'fireworks' | 'together'
 
 /**
  * Represents the options for LLM processing.
diff --git a/src/utils/runLLM.ts b/src/utils/runLLM.ts
index 419b5a0..1514142 100644
--- a/src/utils/runLLM.ts
+++ b/src/utils/runLLM.ts
@@ -9,6 +9,8 @@ import { callGemini } from '../llms/gemini.js'
 import { callCohere } from '../llms/cohere.js'
 import { callMistral } from '../llms/mistral.js'
 import { callOcto } from '../llms/octo.js'
+import { callFireworks } from '../llms/fireworks.js'
+import { callTogether } from '../llms/together.js'
 import { generatePrompt } from '../llms/prompt.js'
 import { log, step, success, wait } from '../models.js'
 import type { LLMServices, ProcessingOptions, LLMFunction, LLMFunctions } from '../types.js'
@@ -38,6 +40,8 @@ export async function runLLM(
     cohere: callCohere,
     mistral: callMistral,
     octo: callOcto,
+    fireworks: callFireworks,
+    together: callTogether,
   }
 
   try {

From 8e0bc4a820e4944a7425ad3f0c91882007485d80 Mon Sep 17 00:00:00 2001
From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com>
Date: Mon, 14 Oct 2024 21:59:07 -0500
Subject: [PATCH 07/10] fix package scripts

---
 package.json | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/package.json b/package.json
index 821bb2a..c1be6ee 100644
--- a/package.json
+++ b/package.json
@@ -24,18 +24,18 @@
     "docker": "docker compose run --remove-orphans autoshow",
     "docker-up": "docker compose up --build -d --remove-orphans --no-start",
     "ds": "docker compose images && docker compose ls",
-    "v": "node --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --video",
-    "u": "node --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --urls",
-    "p": "node --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --playlist",
-    "f": "node --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --file",
-    "r": "node --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --rss",
-    "last3": "node --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --last 3 --rss",
-    "serve": "node --env-file=.env --no-warnings --watch packages/server/index.ts",
-    "fetch-local": "node --env-file=.env --no-warnings packages/server/tests/fetch-local.ts",
-    "fetch-all": "node --env-file=.env --no-warnings packages/server/tests/fetch-all.ts",
+    "v": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --video",
+    "u": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --urls",
+    "p": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --playlist",
+    "f": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --file",
+    "r": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --rss",
+    "last3": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --last 3 --rss",
+    "serve": "tsx --env-file=.env --no-warnings --watch packages/server/index.ts",
+    "fetch-local": "tsx --env-file=.env --no-warnings packages/server/tests/fetch-local.ts",
+    "fetch-all": "tsx --env-file=.env --no-warnings packages/server/tests/fetch-all.ts",
     "t": "npm run test-local",
-    "test-local": "node --test test/local.test.js",
-    "test-all": "node --test test/all.test.js",
+    "test-local": "tsx --test test/local.test.js",
+    "test-all": "tsx --test test/all.test.js",
     "clean": "tsx scripts/cleanContent.ts",
     "bun-as": "bun --env-file=.env --no-warnings src/autoshow.ts",
     "deno-as": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env src/autoshow.ts"

From d44a5450a2e3121d2e21823a71ab0b12ccb816d5 Mon Sep 17 00:00:00 2001
From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com>
Date: Mon, 14 Oct 2024 23:55:45 -0500
Subject: [PATCH 08/10] select fireworks and together model

---
 docs/examples.md              |  24 ++++++++
 src/llms/fireworks.ts         |  13 ++---
 src/llms/together.ts          |  17 +++---
 src/models.ts                 |  30 +++++++++-
 src/transcription/deepgram.ts |  36 +-----------
 src/types.ts                  | 104 ++++++++++++++++++++++++++++++++++
 6 files changed, 172 insertions(+), 52 deletions(-)

diff --git a/docs/examples.md b/docs/examples.md
index fcb76e3..64fec6d 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -251,12 +251,36 @@ npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo WIZAR
 npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks
 ```
 
+Select Fireworks model:
+
+```bash
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks LLAMA_3_1_405B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks LLAMA_3_1_70B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks LLAMA_3_1_8B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks LLAMA_3_2_3B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks LLAMA_3_2_1B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks QWEN_2_5_72B
+```
+
 ### Together
 
 ```bash
 npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together
 ```
 
+Select Together model:
+
+```bash
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together LLAMA_3_2_3B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together LLAMA_3_1_405B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together LLAMA_3_1_70B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together LLAMA_3_1_8B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together GEMMA_2_27B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together GEMMA_2_9B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together QWEN_2_5_72B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together QWEN_2_5_7B
+```
+
 ### Llama.cpp
 
 ```bash
diff --git a/src/llms/fireworks.ts b/src/llms/fireworks.ts
index c7c2ed4..060d5df 100644
--- a/src/llms/fireworks.ts
+++ b/src/llms/fireworks.ts
@@ -2,9 +2,8 @@
 
 import { writeFile } from 'node:fs/promises'
 import { env } from 'node:process'
-import fetch from 'node-fetch'
-import { log, wait } from '../models.js'
-import type { LLMFunction } from '../types.js'
+import { log, wait, FIREWORKS_MODELS } from '../models.js'
+import type { LLMFunction, FireworksModelType, FireworksResponse } from '../types.js'
 
 /**
  * Main function to call Fireworks AI API.
@@ -17,7 +16,7 @@ import type { LLMFunction } from '../types.js'
 export const callFireworks: LLMFunction = async (
   promptAndTranscript: string,
   tempPath: string,
-  model: string = 'accounts/fireworks/models/llama-v3p2-3b-instruct'
+  model: string = 'LLAMA_3_2_3B'
 ): Promise<void> => {
   // Check if the FIREWORKS_API_KEY environment variable is set
   if (!env.FIREWORKS_API_KEY) {
@@ -25,11 +24,11 @@ export const callFireworks: LLMFunction = async (
   }
 
   try {
-    log(wait(`\n  Using Fireworks model:\n    - ${model}`))
+    const actualModel = FIREWORKS_MODELS[model as FireworksModelType] || FIREWORKS_MODELS.LLAMA_3_2_3B
 
     // Prepare the request body
     const requestBody = {
-      model: "accounts/fireworks/models/llama-v3p2-3b-instruct",
+      model: actualModel,
       messages: [
         {
           role: 'user',
@@ -54,7 +53,7 @@ export const callFireworks: LLMFunction = async (
       throw new Error(`Fireworks API error: ${response.status} ${response.statusText} - ${errorText}`)
     }
 
-    const data = await response.json()
+    const data = await response.json() as FireworksResponse
 
     // Extract the generated content
     const content = data.choices[0]?.message?.content
diff --git a/src/llms/together.ts b/src/llms/together.ts
index d7e4142..dff3207 100644
--- a/src/llms/together.ts
+++ b/src/llms/together.ts
@@ -2,9 +2,8 @@
 
 import { writeFile } from 'node:fs/promises'
 import { env } from 'node:process'
-import fetch from 'node-fetch'
-import { log, wait } from '../models.js'
-import type { LLMFunction } from '../types.js'
+import { log, wait, TOGETHER_MODELS } from '../models.js'
+import type { LLMFunction, TogetherModelType, TogetherResponse } from '../types.js'
 
 /**
  * Main function to call Together AI API.
@@ -17,7 +16,7 @@ import type { LLMFunction } from '../types.js'
 export const callTogether: LLMFunction = async (
   promptAndTranscript: string,
   tempPath: string,
-  model: string = 'meta-llama/Llama-3.2-3B-Instruct-Turbo'
+  model: string = 'LLAMA_3_2_3B'
 ): Promise<void> => {
   // Check if the TOGETHER_API_KEY environment variable is set
   if (!env.TOGETHER_API_KEY) {
@@ -25,19 +24,19 @@ export const callTogether: LLMFunction = async (
   }
 
   try {
-    log(wait(`\n  Using Together AI model:\n    - ${model}`))
+    const actualModel = TOGETHER_MODELS[model as TogetherModelType] || TOGETHER_MODELS.LLAMA_3_2_3B
 
     // Prepare the request body
     const requestBody = {
-      model: 'meta-llama/Llama-3.2-3B-Instruct-Turbo',
+      model: actualModel,
       messages: [
         {
           role: 'user',
           content: promptAndTranscript,
         },
       ],
-      max_tokens: 2000,
-      temperature: 0.7,
+      // max_tokens: 2000,
+      // temperature: 0.7,
     }
 
     // Make API call to Together AI
@@ -57,7 +56,7 @@ export const callTogether: LLMFunction = async (
       throw new Error(`Together AI API error: ${response.status} ${response.statusText} - ${errorText}`)
     }
 
-    const data = await response.json()
+    const data = await response.json() as TogetherResponse
 
     // Extract the generated content
     const content = data.choices[0]?.message?.content
diff --git a/src/models.ts b/src/models.ts
index 1684fce..cc452ee 100644
--- a/src/models.ts
+++ b/src/models.ts
@@ -2,7 +2,7 @@
 
 import chalk from 'chalk'
 import type { ChalkInstance } from 'chalk'
-import type { WhisperModelType, ChatGPTModelType, ClaudeModelType, CohereModelType, GeminiModelType, MistralModelType, OctoModelType, LlamaModelType, OllamaModelType } from './types.js'
+import type { WhisperModelType, ChatGPTModelType, ClaudeModelType, CohereModelType, GeminiModelType, MistralModelType, OctoModelType, LlamaModelType, OllamaModelType, TogetherModelType, FireworksModelType } from './types.js'
 
 export const step: ChalkInstance = chalk.bold.underline
 export const dim: ChalkInstance = chalk.dim
@@ -118,6 +118,34 @@ export const OCTO_MODELS: Record<OctoModelType, string> = {
   WIZARD_2_8X_22B: "wizardlm-2-8x22b",
 }
 
+/**
+ * Map of Fireworks model identifiers to their API names
+ * @type {Record<FireworksModelType, string>}
+ */
+export const FIREWORKS_MODELS: Record<FireworksModelType, string> = {
+  LLAMA_3_1_405B: "accounts/fireworks/models/llama-v3p1-405b-instruct",
+  LLAMA_3_1_70B: "accounts/fireworks/models/llama-v3p1-70b-instruct",
+  LLAMA_3_1_8B: "accounts/fireworks/models/llama-v3p1-8b-instruct",
+  LLAMA_3_2_3B: "accounts/fireworks/models/llama-v3p2-3b-instruct",
+  LLAMA_3_2_1B: "accounts/fireworks/models/llama-v3p2-1b-instruct",
+  QWEN_2_5_72B: "accounts/fireworks/models/qwen2p5-72b-instruct",
+}
+
+/**
+ * Map of Together model identifiers to their API names
+ * @type {Record<TogetherModelType, string>}
+ */
+export const TOGETHER_MODELS: Record<TogetherModelType, string> = {
+  LLAMA_3_2_3B: "meta-llama/Llama-3.2-3B-Instruct-Turbo",
+  LLAMA_3_1_405B: "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
+  LLAMA_3_1_70B: "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
+  LLAMA_3_1_8B: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+  GEMMA_2_27B: "google/gemma-2-27b-it",
+  GEMMA_2_9B: "google/gemma-2-9b-it",
+  QWEN_2_5_72B: "Qwen/Qwen2.5-72B-Instruct-Turbo",
+  QWEN_2_5_7B: "Qwen/Qwen2.5-7B-Instruct-Turbo",
+}
+
 /**
  * Map of local model identifiers to their filenames and URLs
  * @type {Record<LlamaModelType, {filename: string, url: string}>}
diff --git a/src/transcription/deepgram.ts b/src/transcription/deepgram.ts
index 0b7950f..14e5fd8 100644
--- a/src/transcription/deepgram.ts
+++ b/src/transcription/deepgram.ts
@@ -3,41 +3,7 @@
 import { writeFile, readFile } from 'node:fs/promises'
 import { env } from 'node:process'
 import { log, wait } from '../models.js'
-import type { ProcessingOptions } from '../types.js'
-
-// Define types for Deepgram API response
-type DeepgramResponse = {
-  metadata: {
-    transaction_key: string
-    request_id: string
-    sha256: string
-    created: string
-    duration: number
-    channels: number
-    models: string[]
-    model_info: {
-      [key: string]: {
-        name: string
-        version: string
-        arch: string
-      }
-    }
-  }
-  results: {
-    channels: Array<{
-      alternatives: Array<{
-        transcript: string
-        confidence: number
-        words: Array<{
-          word: string
-          start: number
-          end: number
-          confidence: number
-        }>
-      }>
-    }>
-  }
-}
+import type { ProcessingOptions, DeepgramResponse } from '../types.js'
 
 /**
  * Main function to handle transcription using Deepgram API.
diff --git a/src/types.ts b/src/types.ts
index 2c68cd8..23b856c 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -326,11 +326,81 @@ export type GeminiModelType = 'GEMINI_1_5_FLASH' | 'GEMINI_1_5_PRO'
 export type MistralModelType = 'MIXTRAL_8x7b' | 'MIXTRAL_8x22b' | 'MISTRAL_LARGE' | 'MISTRAL_NEMO'
 /** Define available OctoAI models. */
 export type OctoModelType = 'LLAMA_3_1_8B' | 'LLAMA_3_1_70B' | 'LLAMA_3_1_405B' | 'MISTRAL_7B' | 'MIXTRAL_8X_7B' | 'NOUS_HERMES_MIXTRAL_8X_7B' | 'WIZARD_2_8X_22B'
+/** Define available Fireworks models. */
+export type FireworksModelType = 'LLAMA_3_1_405B' | 'LLAMA_3_1_70B' | 'LLAMA_3_1_8B' | 'LLAMA_3_2_3B' | 'LLAMA_3_2_1B' | 'QWEN_2_5_72B'
+/** Define available Together models. */
+export type TogetherModelType = 'LLAMA_3_2_3B' | 'LLAMA_3_1_405B' | 'LLAMA_3_1_70B' | 'LLAMA_3_1_8B' | 'GEMMA_2_27B' | 'GEMMA_2_9B' | 'QWEN_2_5_72B' | 'QWEN_2_5_7B'
 /** Define local model configurations. */
 export type LlamaModelType = 'QWEN_2_5_1B' | 'QWEN_2_5_3B' | 'PHI_3_5' | 'LLAMA_3_2_1B' | 'GEMMA_2_2B'
 /** Define local model with Ollama. */
 export type OllamaModelType = 'LLAMA_3_2_1B' | 'LLAMA_3_2_3B' | 'GEMMA_2_2B' | 'PHI_3_5' | 'QWEN_2_5_1B' | 'QWEN_2_5_3B'
 
+export type FireworksResponse = {
+  id: string
+  object: string
+  created: number
+  model: string
+  prompt: any[]
+  choices: {
+    finish_reason: string
+    index: number
+    message: {
+      role: string
+      content: string
+      tool_calls: {
+        id: string
+        type: string
+        function: {
+          name: string
+          arguments: string
+        }
+      }[]
+    }
+  }[]
+  usage: {
+    prompt_tokens: number
+    completion_tokens: number
+    total_tokens: number
+  }
+}
+
+export type TogetherResponse = {
+  id: string
+  object: string
+  created: number
+  model: string
+  prompt: any[]
+  choices: {
+    text: string
+    finish_reason: string
+    seed: number
+    index: number
+    message: {
+      role: string
+      content: string
+      tool_calls: {
+        index: number
+        id: string
+        type: string
+        function: {
+          name: string
+          arguments: string
+        }
+      }[]
+    }
+    logprobs: {
+      token_ids: number[]
+      tokens: string[]
+      token_logprobs: number[]
+    }
+  }[]
+  usage: {
+    prompt_tokens: number
+    completion_tokens: number
+    total_tokens: number
+  }
+}
+
 // Define the expected structure of the response from Ollama API
 export type OllamaResponse = {
   model: string
@@ -367,6 +437,40 @@ export type OllamaTagsResponse = {
   }>
 }
 
+// Define types for Deepgram API response
+export type DeepgramResponse = {
+  metadata: {
+    transaction_key: string
+    request_id: string
+    sha256: string
+    created: string
+    duration: number
+    channels: number
+    models: string[]
+    model_info: {
+      [key: string]: {
+        name: string
+        version: string
+        arch: string
+      }
+    }
+  }
+  results: {
+    channels: Array<{
+      alternatives: Array<{
+        transcript: string
+        confidence: number
+        words: Array<{
+          word: string
+          start: number
+          end: number
+          confidence: number
+        }>
+      }>
+    }>
+  }
+}
+
 /**
  * Represents the function signature for cleaning up temporary files.
  */

From 659557b383b9cb86a9b02afa9f548afba607ed9e Mon Sep 17 00:00:00 2001
From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com>
Date: Tue, 15 Oct 2024 03:12:16 -0500
Subject: [PATCH 09/10] add whisper diarization option

---
 .gitignore                              |  4 +-
 scripts/setup-python.sh                 | 17 +++++
 src/autoshow.ts                         |  1 +
 src/models.ts                           |  2 +-
 src/transcription/whisperDiarization.ts | 90 +++++++++++++++++++++++++
 src/transcription/whisperPython.ts      | 10 +--
 src/types.ts                            |  4 +-
 src/utils/runTranscription.ts           |  4 ++
 8 files changed, 124 insertions(+), 8 deletions(-)
 create mode 100755 scripts/setup-python.sh
 create mode 100644 src/transcription/whisperDiarization.ts

diff --git a/.gitignore b/.gitignore
index ccb1788..8720a32 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@ node_modules
 .DS_Store
 /content
 whisper.cpp
+whisper-diarization
 package-lock.json
 .env
 src/llms/models
@@ -12,4 +13,5 @@ out
 types
 dist
 NEW.md
-TODO.md
\ No newline at end of file
+TODO.md
+nemo_msdd_configs
\ No newline at end of file
diff --git a/scripts/setup-python.sh b/scripts/setup-python.sh
new file mode 100755
index 0000000..634719b
--- /dev/null
+++ b/scripts/setup-python.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+# Clone the repository
+git clone https://github.com/MahmoudAshraf97/whisper-diarization.git
+
+# Create and activate virtual environment
+python3.12 -m venv whisper-diarization/venv
+source whisper-diarization/venv/bin/activate
+
+# Install the requirements
+pip install -c whisper-diarization/constraints.txt -r whisper-diarization/requirements.txt
+
+echo "Setup complete. To activate this environment in the future, run:"
+echo "source whisper-diarization/venv/bin/activate"
+echo ""
+echo "To deactivate this environment, run:"
+echo "deactivate"
\ No newline at end of file
diff --git a/src/autoshow.ts b/src/autoshow.ts
index 53be20e..625f68c 100644
--- a/src/autoshow.ts
+++ b/src/autoshow.ts
@@ -46,6 +46,7 @@ program
   .option('--whisper [model]', 'Use Whisper.cpp for transcription with optional model specification')
   .option('--whisperDocker [model]', 'Use Whisper.cpp in Docker for transcription with optional model specification')
   .option('--whisperPython [model]', 'Use openai-whisper for transcription with optional model specification')
+  .option('--whisperDiarization [model]', 'Use whisper-diarization for transcription with optional model specification')
   .option('--deepgram', 'Use Deepgram for transcription')
   .option('--assembly', 'Use AssemblyAI for transcription')
   .option('--speakerLabels', 'Use speaker labels for AssemblyAI transcription')
diff --git a/src/models.ts b/src/models.ts
index cc452ee..97942dc 100644
--- a/src/models.ts
+++ b/src/models.ts
@@ -15,7 +15,7 @@ export const log: typeof console.log = console.log
 
 export const ACTION_OPTIONS = ['video', 'playlist', 'urls', 'file', 'rss']
 export const LLM_OPTIONS = ['chatgpt', 'claude', 'cohere', 'mistral', 'octo', 'llama', 'ollama', 'gemini', 'fireworks', 'together']
-export const TRANSCRIPT_OPTIONS = ['whisper', 'whisperDocker', 'whisperPython', 'deepgram', 'assembly']
+export const TRANSCRIPT_OPTIONS = ['whisper', 'whisperDocker', 'whisperPython', 'whisperDiarization', 'deepgram', 'assembly']
 
 /**
  * Define available Whisper models for whisper.cpp
diff --git a/src/transcription/whisperDiarization.ts b/src/transcription/whisperDiarization.ts
new file mode 100644
index 0000000..0c78570
--- /dev/null
+++ b/src/transcription/whisperDiarization.ts
@@ -0,0 +1,90 @@
+// src/transcription/whisperDiarization.ts
+
+import { readFile, writeFile, unlink } from 'node:fs/promises'
+import { exec } from 'node:child_process'
+import { promisify } from 'node:util'
+// import { existsSync } from 'node:fs'
+import { log, wait } from '../models.js'
+import type { ProcessingOptions } from '../types.js'
+import { WHISPER_PYTHON_MODELS } from '../models.js'
+
+const execPromise = promisify(exec)
+
+/**
+ * Main function to handle transcription using openai-whisper Python library.
+ * @param {ProcessingOptions} options - Additional processing options.
+ * @param {string} finalPath - The base path for the files.
+ * @returns {Promise<string>} - Returns the formatted transcript content.
+ * @throws {Error} - If an error occurs during transcription.
+ */
+export async function callWhisperDiarization(options: ProcessingOptions, finalPath: string): Promise<string> {
+  log(wait('\n  Using openai-whisper Python library for transcription...'))
+
+  try {
+    // Get the whisper model from options or use 'base' as default
+    let whisperModel: string = 'base'
+    if (typeof options.whisperDiarization === 'string') {
+      whisperModel = options.whisperDiarization
+    } else if (options.whisperDiarization !== true) {
+      throw new Error('Invalid whisperPython option')
+    }
+
+    if (!(whisperModel in WHISPER_PYTHON_MODELS)) {
+      throw new Error(`Unknown model type: ${whisperModel}`)
+    }
+
+    log(wait(`\n    - whisperModel: ${whisperModel}`))
+
+    // // Check if ffmpeg is installed
+    // try {
+    //   await execPromise('ffmpeg -version')
+    // } catch (error) {
+    //   throw new Error('ffmpeg is not installed or not available in PATH')
+    // }
+
+    // // Check if Python is installed
+    // try {
+    //   await execPromise('python3 --version')
+    // } catch (error) {
+    //   throw new Error('Python is not installed or not available in PATH')
+    // }
+
+    // // Check if the whisper-diarization repo is cloned
+    // if (!existsSync('./whisper-diarization')) {
+    //   log(`\n  No whisper-diarization repo found, running git clone...\n`)
+    //   await execPromise('git clone https://github.com/MahmoudAshraf97/whisper-diarization.git')
+    //   log(`\n    - whisper-diarization clone complete.\n`)
+    // }
+
+    // Prepare the command to run the transcription
+    const command = `python whisper-diarization/diarize.py -a ${finalPath}.wav --whisper-model ${whisperModel}`
+
+    log(wait(`\n  Running transcription with command:\n    ${command}\n`))
+
+    // Execute the command
+    await execPromise(command)
+
+    await unlink(`${finalPath}.txt`)
+    log(wait(`\n  Extra TXT file deleted:\n    - ${finalPath}.txt\n`))
+
+    // Read the generated transcript file
+    const transcriptContent = await readFile(`${finalPath}.srt`, 'utf8')
+
+    // Write the transcript to the expected output file
+    await writeFile(`${finalPath}.txt`, transcriptContent)
+
+    // Create an empty LRC file to prevent cleanup errors and unlink SRT file
+    await writeFile(`${finalPath}.lrc`, '')
+    log(wait(`\n  Empty LRC file created:\n    - ${finalPath}.lrc\n`))
+    await unlink(`${finalPath}.srt`)
+    log(wait(`\n  SRT file deleted:\n    - ${finalPath}.srt\n`))
+
+    log(wait(`\n  Transcript successfully completed:\n    - ${finalPath}.txt\n`))
+
+    return transcriptContent
+
+  } catch (error) {
+    console.error('Error in callWhisperDiarization:', (error as Error).message)
+    process.exit(1)
+  }
+}
diff --git a/src/transcription/whisperPython.ts b/src/transcription/whisperPython.ts
index 8667f1e..aa3de8b 100644
--- a/src/transcription/whisperPython.ts
+++ b/src/transcription/whisperPython.ts
@@ -60,7 +60,7 @@ export async function callWhisperPython(options: ProcessingOptions, finalPath: s
     }
 
     // Prepare the command to run the transcription
-    const command = `whisper "${finalPath}.wav" --model ${whisperModel} --output_dir "content" --output_format vtt --language en --word_timestamps True`
+    const command = `whisper "${finalPath}.wav" --model ${whisperModel} --output_dir "content" --output_format srt --language en --word_timestamps True`
 
     log(wait(`\n  Running transcription with command:\n    ${command}\n`))
 
@@ -68,16 +68,16 @@ export async function callWhisperPython(options: ProcessingOptions, finalPath: s
     await execPromise(command)
 
     // Read the generated transcript file
-    const transcriptContent = await readFile(`${finalPath}.vtt`, 'utf8')
+    const transcriptContent = await readFile(`${finalPath}.srt`, 'utf8')
 
     // Write the transcript to the expected output file
     await writeFile(`${finalPath}.txt`, transcriptContent)
 
-    // Create an empty LRC file to prevent cleanup errors and unlink VTT file
+    // Create an empty LRC file to prevent cleanup errors and unlink SRT file
     await writeFile(`${finalPath}.lrc`, '')
     log(wait(`\n  Empty LRC file created:\n    - ${finalPath}.lrc\n`))
-    await unlink(`${finalPath}.vtt`)
-    log(wait(`\n  VTT file deleted:\n    - ${finalPath}.vtt\n`))
+    await unlink(`${finalPath}.srt`)
+    log(wait(`\n  SRT file deleted:\n    - ${finalPath}.srt\n`))
 
     log(wait(`\n  Transcript successfully completed:\n    - ${finalPath}.txt\n`))
 
diff --git a/src/types.ts b/src/types.ts
index 23b856c..fc2ab85 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -28,6 +28,8 @@ export type ProcessingOptions = {
   whisper?: WhisperModelType
   /** The Whisper Python model to use (e.g., 'tiny', 'base'). */
   whisperPython?: WhisperModelType
+  /** The Whisper Diarization model to use (e.g., 'tiny', 'base'). */
+  whisperDiarization?: WhisperModelType
   /** The Whisper model to use with Docker (e.g., 'tiny', 'base'). */
   whisperDocker?: WhisperModelType
   /** Flag to use Deepgram for transcription. */
@@ -237,7 +239,7 @@ export type SupportedFileType = 'wav' | 'mp3' | 'm4a' | 'aac' | 'ogg' | 'flac' |
  * - deepgram: Use Deepgram's transcription service.
  * - assembly: Use AssemblyAI's transcription service.
  */
-export type TranscriptServices = 'whisper' | 'whisperDocker' | 'whisperPython' | 'deepgram' | 'assembly'
+export type TranscriptServices = 'whisper' | 'whisperDocker' | 'whisperPython' | 'whisperDiarization' | 'deepgram' | 'assembly'
 
 /**
  * Represents the available Whisper model types.
diff --git a/src/utils/runTranscription.ts b/src/utils/runTranscription.ts
index a5cf2dc..21099de 100644
--- a/src/utils/runTranscription.ts
+++ b/src/utils/runTranscription.ts
@@ -3,6 +3,7 @@
 import { callWhisper } from '../transcription/whisper.js'
 import { callWhisperPython } from '../transcription/whisperPython.js'
 import { callWhisperDocker } from '../transcription/whisperDocker.js'
+import { callWhisperDiarization } from '../transcription/whisperDiarization.js'
 import { callDeepgram } from '../transcription/deepgram.js'
 import { callAssembly } from '../transcription/assembly.js'
 import { log, step } from '../models.js'
@@ -40,6 +41,9 @@ export async function runTranscription(
     case 'whisperPython':
       await callWhisperPython(options, finalPath)
       break
+    case 'whisperDiarization':
+      await callWhisperDiarization(options, finalPath)
+      break
     default:
       throw new Error(`Unknown transcription service: ${transcriptServices}`)
   }

From 9c8b6e025e0a3b1a8de7b77eb0584b831a1c4272 Mon Sep 17 00:00:00 2001
From: Anthony Campolo <12433465+ajcwebdev@users.noreply.github.com>
Date: Tue, 15 Oct 2024 03:28:51 -0500
Subject: [PATCH 10/10] format srt files

---
 src/transcription/whisperDiarization.ts | 49 +++++++++++++++++++------
 src/transcription/whisperPython.ts      | 46 ++++++++++++++++++-----
 2 files changed, 74 insertions(+), 21 deletions(-)

diff --git a/src/transcription/whisperDiarization.ts b/src/transcription/whisperDiarization.ts
index 0c78570..0b42c4b 100644
--- a/src/transcription/whisperDiarization.ts
+++ b/src/transcription/whisperDiarization.ts
@@ -64,24 +64,49 @@ export async function callWhisperDiarization(options: ProcessingOptions, finalPa
     // Execute the command
     await execPromise(command)
 
-    await unlink(`${finalPath}.txt`)
-    log(wait(`\n  Extra TXT file deleted:\n    - ${finalPath}.txt\n`))
-
     // Read the generated transcript file
-    const transcriptContent = await readFile(`${finalPath}.srt`, 'utf8')
-
-    // Write the transcript to the expected output file
-    await writeFile(`${finalPath}.txt`, transcriptContent)
-
+    const srtContent = await readFile(`${finalPath}.srt`, 'utf8')
+    
+    // Process and format the SRT content
+    const blocks = srtContent.split('\n\n')
+    
+    const txtContent = blocks
+      .map(block => {
+        const lines = block.split('\n').filter(line => line.trim() !== '')
+        if (lines.length >= 2) {
+          // lines[0] is the sequence number
+          // lines[1] is the timestamp line
+          // lines[2...] are the subtitle text lines
+          const timestampLine = lines[1]
+          const textLines = lines.slice(2)
+          const match = timestampLine.match(/(\d{2}):(\d{2}):(\d{2}),\d{3}/)
+          if (match) {
+            const hours = parseInt(match[1], 10)
+            const minutes = parseInt(match[2], 10)
+            const seconds = match[3]
+            const totalMinutes = hours * 60 + minutes
+            const timestamp = `[${String(totalMinutes).padStart(2, '0')}:${seconds}]`
+            const text = textLines.join(' ')
+            return `${timestamp} ${text}`
+          }
+        }
+        return null
+      })
+      .filter(line => line !== null)
+      .join('\n')
+    
+    // Write the formatted content to a text file
+    await writeFile(`${finalPath}.txt`, txtContent)
+    log(wait(`\n  Transcript transformation successfully completed...\n    - ${finalPath}.txt\n`))
+    
     // Create an empty LRC file to prevent cleanup errors and unlink SRT file
     await writeFile(`${finalPath}.lrc`, '')
     log(wait(`\n  Empty LRC file created:\n    - ${finalPath}.lrc\n`))
     await unlink(`${finalPath}.srt`)
     log(wait(`\n  SRT file deleted:\n    - ${finalPath}.srt\n`))
-
-    log(wait(`\n  Transcript successfully completed:\n    - ${finalPath}.txt\n`))
-
-    return transcriptContent
+    
+    // Return the processed content
+    return txtContent
 
   } catch (error) {
     console.error('Error in callWhisperDiarization:', (error as Error).message)
diff --git a/src/transcription/whisperPython.ts b/src/transcription/whisperPython.ts
index aa3de8b..6472f3d 100644
--- a/src/transcription/whisperPython.ts
+++ b/src/transcription/whisperPython.ts
@@ -68,20 +68,48 @@ export async function callWhisperPython(options: ProcessingOptions, finalPath: s
     await execPromise(command)
 
     // Read the generated transcript file
-    const transcriptContent = await readFile(`${finalPath}.srt`, 'utf8')
-
-    // Write the transcript to the expected output file
-    await writeFile(`${finalPath}.txt`, transcriptContent)
-
+    const srtContent = await readFile(`${finalPath}.srt`, 'utf8')
+    
+    // Process and format the SRT content
+    const blocks = srtContent.split('\n\n')
+    
+    const txtContent = blocks
+      .map(block => {
+        const lines = block.split('\n').filter(line => line.trim() !== '')
+        if (lines.length >= 2) {
+          // lines[0] is the sequence number
+          // lines[1] is the timestamp line
+          // lines[2...] are the subtitle text lines
+          const timestampLine = lines[1]
+          const textLines = lines.slice(2)
+          const match = timestampLine.match(/(\d{2}):(\d{2}):(\d{2}),\d{3}/)
+          if (match) {
+            const hours = parseInt(match[1], 10)
+            const minutes = parseInt(match[2], 10)
+            const seconds = match[3]
+            const totalMinutes = hours * 60 + minutes
+            const timestamp = `[${String(totalMinutes).padStart(2, '0')}:${seconds}]`
+            const text = textLines.join(' ')
+            return `${timestamp} ${text}`
+          }
+        }
+        return null
+      })
+      .filter(line => line !== null)
+      .join('\n')
+    
+    // Write the formatted content to a text file
+    await writeFile(`${finalPath}.txt`, txtContent)
+    log(wait(`\n  Transcript transformation successfully completed...\n    - ${finalPath}.txt\n`))
+    
     // Create an empty LRC file to prevent cleanup errors and unlink SRT file
     await writeFile(`${finalPath}.lrc`, '')
     log(wait(`\n  Empty LRC file created:\n    - ${finalPath}.lrc\n`))
     await unlink(`${finalPath}.srt`)
     log(wait(`\n  SRT file deleted:\n    - ${finalPath}.srt\n`))
-
-    log(wait(`\n  Transcript successfully completed:\n    - ${finalPath}.txt\n`))
-
-    return transcriptContent
+    
+    // Return the processed content
+    return txtContent
 
   } catch (error) {
     console.error('Error in callWhisperPython:', (error as Error).message)