Merge pull request #35 from ajcwebdev/dev

Diarization, Whisper Python, Fireworks, and Together
ajcwebdev · Oct 17, 2024 · b226287 · b226287
2 parents 276abff + 9c8b6e0
commit b226287
Show file tree

Hide file tree

Showing 21 changed files with 983 additions and 223 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@ node_modules
 .DS_Store
 /content
 whisper.cpp
+whisper-diarization
 package-lock.json
 .env
 src/llms/models
@@ -12,4 +13,5 @@ out
 types
 dist
 NEW.md
-TODO.md
+TODO.md
+nemo_msdd_configs
diff --git a/docs/examples.md b/docs/examples.md
@@ -245,6 +245,42 @@ npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo NOUS_
 npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --octo WIZARD_2_8X_22B
 ```
 
+### Fireworks
+
+```bash
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks
+```
+
+Select Fireworks model:
+
+```bash
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks LLAMA_3_1_405B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks LLAMA_3_1_70B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks LLAMA_3_1_8B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks LLAMA_3_2_3B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks LLAMA_3_2_1B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --fireworks QWEN_2_5_72B
+```
+
+### Together
+
+```bash
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together
+```
+
+Select Together model:
+
+```bash
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together LLAMA_3_2_3B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together LLAMA_3_1_405B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together LLAMA_3_1_70B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together LLAMA_3_1_8B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together GEMMA_2_27B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together GEMMA_2_9B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together QWEN_2_5_72B
+npm run as -- --video "https://www.youtube.com/watch?v=MORMZXEaONk" --together QWEN_2_5_7B
+```
+
 ### Llama.cpp
 
 ```bash

diff --git a/package.json b/package.json
@@ -24,45 +24,46 @@
     "docker": "docker compose run --remove-orphans autoshow",
     "docker-up": "docker compose up --build -d --remove-orphans --no-start",
     "ds": "docker compose images && docker compose ls",
-    "v": "node --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --video",
-    "u": "node --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --urls",
-    "p": "node --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --playlist",
-    "f": "node --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --file",
-    "r": "node --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --rss",
-    "last3": "node --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --last 3 --rss",
-    "serve": "node --env-file=.env --no-warnings --watch packages/server/index.ts",
-    "fetch-local": "node --env-file=.env --no-warnings packages/server/tests/fetch-local.ts",
-    "fetch-all": "node --env-file=.env --no-warnings packages/server/tests/fetch-all.ts",
+    "v": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --video",
+    "u": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --urls",
+    "p": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --playlist",
+    "f": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --file",
+    "r": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --rss",
+    "last3": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --last 3 --rss",
+    "serve": "tsx --env-file=.env --no-warnings --watch packages/server/index.ts",
+    "fetch-local": "tsx --env-file=.env --no-warnings packages/server/tests/fetch-local.ts",
+    "fetch-all": "tsx --env-file=.env --no-warnings packages/server/tests/fetch-all.ts",
     "t": "npm run test-local",
-    "test-local": "node --test test/local.test.js",
-    "test-all": "node --test test/all.test.js",
+    "test-local": "tsx --test test/local.test.js",
+    "test-all": "tsx --test test/all.test.js",
+    "clean": "tsx scripts/cleanContent.ts",
     "bun-as": "bun --env-file=.env --no-warnings src/autoshow.ts",
     "deno-as": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env src/autoshow.ts"
   },
   "dependencies": {
-    "@anthropic-ai/sdk": "^0.26.0",
-    "@deepgram/sdk": "^3.5.1",
-    "@fastify/cors": "^10.0.1",
-    "@google/generative-ai": "^0.17.1",
-    "@mistralai/mistralai": "^1.0.2",
-    "@octoai/sdk": "^1.5.1",
-    "assemblyai": "^4.6.1",
-    "chalk": "^5.3.0",
-    "cohere-ai": "^7.12.0",
-    "commander": "^12.1.0",
-    "fast-xml-parser": "^4.4.1",
-    "fastify": "^5.0.0",
-    "ffmpeg-static": "^5.2.0",
-    "file-type": "^19.4.1",
-    "inquirer": "^10.2.2",
-    "node-llama-cpp": "^3.1.0",
-    "ollama": "^0.5.9",
-    "openai": "^4.55.7"
+    "@anthropic-ai/sdk": "0.29.0",
+    "@deepgram/sdk": "3.8.1",
+    "@fastify/cors": "10.0.1",
+    "@google/generative-ai": "0.21.0",
+    "@mistralai/mistralai": "1.1.0",
+    "@octoai/sdk": "1.11.0",
+    "assemblyai": "4.7.1",
+    "chalk": "5.3.0",
+    "cohere-ai": "7.14.0",
+    "commander": "12.1.0",
+    "fast-xml-parser": "4.5.0",
+    "fastify": "5.0.0",
+    "ffmpeg-static": "5.2.0",
+    "file-type": "19.5.0",
+    "inquirer": "12.0.0",
+    "node-llama-cpp": "3.1.1",
+    "ollama": "0.5.9",
+    "openai": "4.67.3"
   },
   "devDependencies": {
-    "@types/inquirer": "^9.0.7",
-    "@types/node": "^22.7.5",
-    "tsx": "^4.19.1",
-    "typescript": "^5.6.2"
+    "@types/inquirer": "9.0.7",
+    "@types/node": "22.7.5",
+    "tsx": "4.19.1",
+    "typescript": "5.6.3"
   }
 }
diff --git a/scripts/cleanContent.ts b/scripts/cleanContent.ts
@@ -0,0 +1,26 @@
+// scripts/cleanContent.ts
+
+import { exec } from 'child_process'
+import { promisify } from 'util'
+
+const execAsync = promisify(exec)
+
+async function cleanContent() {
+  try {
+    const { stdout, stderr } = await execAsync(
+      'find content -type f -not \\( -name ".gitkeep" -o -name "audio.mp3" -o -name "example-urls.md" \\) -delete'
+    )
+    if (stderr) {
+      console.error('Error:', stderr)
+      return
+    }
+    console.log('Files deleted successfully')
+    if (stdout) {
+      console.log('Output:', stdout)
+    }
+  } catch (error) {
+    console.error('Execution error:', error)
+  }
+}
+
+cleanContent()
diff --git a/scripts/setup-python.sh b/scripts/setup-python.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+# Clone the repository
+git clone https://github.com/MahmoudAshraf97/whisper-diarization.git
+
+# Create and activate virtual environment
+python3.12 -m venv whisper-diarization/venv
+source whisper-diarization/venv/bin/activate
+
+# Install the requirements
+pip install -c whisper-diarization/constraints.txt -r whisper-diarization/requirements.txt
+
+echo "Setup complete. To activate this environment in the future, run:"
+echo "source whisper-diarization/venv/bin/activate"
+echo ""
+echo "To deactivate this environment, run:"
+echo "deactivate"
diff --git a/scripts/setup.sh b/scripts/setup.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# scripts/setup.sh
 
 # Function to check if a command exists
 command_exists() {
@@ -21,45 +22,73 @@ else
     echo "yt-dlp is already installed."
 fi
 
+# Function to check if Ollama server is running
+check_ollama_server() {
+    if curl -s "http://127.0.0.1:11434" &> /dev/null; then
+        echo "Ollama server is already running."
+    else
+        echo "Ollama server is not running. Starting Ollama server..."
+        ollama serve > ollama.log 2>&1 &
+        OLLAMA_PID=$!
+        echo "Ollama server started with PID $OLLAMA_PID"
+        sleep 5
+    fi
+}
+
+# Function to check if a model is available, and pull it if not
+check_and_pull_model() {
+    local model=$1
+    if ollama list | grep -q "$model"; then
+        echo "Model $model is already available."
+    else
+        echo "Model $model is not available. Pulling the model..."
+        ollama pull "$model"
+    fi
+}
+
 # Check if Ollama is installed
 if ! command_exists ollama; then
     echo "Ollama is not installed, refer to installation instructions here:"
     echo "https://github.com/ollama/ollama"
 else
     echo "Ollama is installed."
-fi
-
-# Check if Ollama server is running
-if ! curl -s "http://127.0.0.1:11434" &> /dev/null; then
-    echo "Ollama server is not running. Starting Ollama server..."
-    ollama serve > ollama.log 2>&1 &
-    OLLAMA_PID=$!
-    echo "Ollama server started with PID $OLLAMA_PID"
-    sleep 5
-else
-    echo "Ollama server is already running."
+
+    # Check if Ollama server is running
+    check_ollama_server
+
+    # Check and pull required models
+    check_and_pull_model "llama3.2:1b"
 fi
 
 # Install npm dependencies
 npm i
 
-# Clone whisper.cpp repository
-git clone https://github.com/ggerganov/whisper.cpp.git
-
-# Download whisper models
-bash ./whisper.cpp/models/download-ggml-model.sh base
-bash ./whisper.cpp/models/download-ggml-model.sh large-v2
-
-# Compile whisper.cpp
-make -C whisper.cpp
-
-# Copy Dockerfile
-cp .github/whisper.Dockerfile whisper.cpp/Dockerfile
-
-# Download Qwen 2.5 1.5B model for Llama.cpp
-curl -L "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q6_k.gguf" -o "./src/llms/models/qwen2.5-1.5b-instruct-q6_k.gguf"
+# Check if whisper.cpp directory exists
+if [ -d "whisper.cpp" ]; then
+    echo "whisper.cpp directory already exists. Skipping clone and setup."
+else
+    echo "Cloning whisper.cpp repository..."
+    git clone https://github.com/ggerganov/whisper.cpp.git
+
+    # Download whisper models
+    echo "Downloading whisper models..."
+    bash ./whisper.cpp/models/download-ggml-model.sh base
+
+    # Compile whisper.cpp
+    echo "Compiling whisper.cpp..."
+    make -C whisper.cpp
+
+    # Copy Dockerfile
+    echo "Copying Dockerfile..."
+    cp .github/whisper.Dockerfile whisper.cpp/Dockerfile
+fi
 
-# Pull Llama 3.1 1B model using Ollama
-ollama pull llama3.2:1b
+# Check if Qwen 2.5 1.5B model exists
+if [ -f "./src/llms/models/qwen2.5-1.5b-instruct-q6_k.gguf" ]; then
+    echo "Qwen 2.5 1.5B model already exists. Skipping download."
+else
+    echo "Downloading Qwen 2.5 1.5B model..."
+    curl -L "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q6_k.gguf" -o "./src/llms/models/qwen2.5-1.5b-instruct-q6_k.gguf"
+fi
 
 echo "Setup completed successfully!"
diff --git a/src/autoshow.ts b/src/autoshow.ts
@@ -45,6 +45,8 @@ program
   .option('--info', 'Generate JSON file with RSS feed information instead of processing items')
   .option('--whisper [model]', 'Use Whisper.cpp for transcription with optional model specification')
   .option('--whisperDocker [model]', 'Use Whisper.cpp in Docker for transcription with optional model specification')
+  .option('--whisperPython [model]', 'Use openai-whisper for transcription with optional model specification')
+  .option('--whisperDiarization [model]', 'Use whisper-diarization for transcription with optional model specification')
   .option('--deepgram', 'Use Deepgram for transcription')
   .option('--assembly', 'Use AssemblyAI for transcription')
   .option('--speakerLabels', 'Use speaker labels for AssemblyAI transcription')
@@ -53,6 +55,8 @@ program
   .option('--cohere [model]', 'Use Cohere for processing with optional model specification')
   .option('--mistral [model]', 'Use Mistral for processing')
   .option('--octo [model]', 'Use Octo for processing')
+  .option('--fireworks [model]', 'Use Fireworks AI for processing with optional model specification')
+  .option('--together [model]', 'Use Together AI for processing with optional model specification')
   .option('--llama [model]', 'Use Node Llama for processing with optional model specification')
   .option('--ollama [model]', 'Use Ollama for processing with optional model specification')
   .option('--gemini [model]', 'Use Gemini for processing with optional model specification')