Merge pull request #39 from ajcwebdev/next

Add Channel Option
ajcwebdev · Nov 1, 2024 · 1060b56 · 1060b56
2 parents 144aac9 + d036493
commit 1060b56
Show file tree

Hide file tree

Showing 16 changed files with 985 additions and 403 deletions.
diff --git a/docs/examples.md b/docs/examples.md
@@ -60,6 +60,13 @@ npm run as -- \
   --info
 ```
 
+### Process All Videos from a YouTube Channel
+
+```bash
+npm run as -- \
+  --channel "https://www.youtube.com/@ajcwebdev"
+```
+
 ### Process Multiple Videos Specified in a URLs File
 
 Run on an arbitrary list of URLs in `example-urls.md`.
@@ -648,14 +655,14 @@ npm run prune
 ### Bun
 
 ```bash
-bun bun-as -- \
+npm run bun -- \
   --video "https://www.youtube.com/watch?v=MORMZXEaONk"
 ```
 
 ### Deno
 
 ```bash
-deno task deno-as \
+npm run deno -- \
   --video "https://www.youtube.com/watch?v=MORMZXEaONk"
 ```
 
@@ -667,7 +674,7 @@ Integration test.
 - Mostly uses transcripts of videos around one minute long and cheaper models when possible, so the total cost of running this for any given service should be at most only a few cents.
 
 ```bash
-npm run test-all
+npm run test-integrations
 ```
 
 Local services test, only uses Whisper for transcription and Ollama for LLM operations.

diff --git a/package.json b/package.json
@@ -20,33 +20,37 @@
     "autoshow": "./dist/autoshow.js"
   },
   "scripts": {
+    "tsx:base": "tsx --env-file=.env --no-warnings",
     "setup": "bash ./scripts/setup.sh",
     "setup-python": "bash ./scripts/setup-python.sh",
-    "autoshow": "tsx --env-file=.env --no-warnings src/autoshow.ts",
-    "as": "tsx --env-file=.env --no-warnings src/autoshow.ts",
+    "autoshow": "npm run tsx:base -- src/autoshow.ts",
+    "as": "npm run tsx:base -- src/autoshow.ts",
+    "v": "npm run tsx:base -- src/autoshow.ts --video",
+    "u": "npm run tsx:base -- src/autoshow.ts --urls",
+    "urls": "npm run tsx:base -- src/autoshow.ts --urls content/urls.md",
+    "p": "npm run tsx:base -- src/autoshow.ts --playlist",
+    "f": "npm run tsx:base -- src/autoshow.ts --file",
+    "r": "npm run tsx:base -- src/autoshow.ts --rss",
+    "rss-info": "npm run tsx:base -- src/autoshow.ts --info --rss",
+    "info": "npm run tsx:base -- src/autoshow.ts --info",
+    "last2": "npm run tsx:base -- src/autoshow.ts --last 2 --rss",
+    "last3": "npm run tsx:base -- src/autoshow.ts --last 3 --rss",
     "docker": "docker compose run --remove-orphans --rm autoshow --whisperDocker",
     "docker-up": "docker compose up --build -d --remove-orphans --no-start",
     "ds": "docker compose images && docker compose ls",
     "prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af",
-    "v": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --video",
-    "u": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --urls",
-    "p": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --playlist",
-    "f": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --file",
-    "r": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --rss",
-    "last2": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --last 2 --rss",
-    "last3": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --last 3 --rss",
-    "serve": "tsx --env-file=.env --no-warnings --watch packages/server/index.ts",
-    "fetch-local": "tsx --env-file=.env --no-warnings packages/server/tests/fetch-local.ts",
-    "fetch-all": "tsx --env-file=.env --no-warnings packages/server/tests/fetch-all.ts",
+    "serve": "npm run tsx:base -- --watch packages/server/index.ts",
+    "fetch-local": "npm run tsx:base -- packages/server/tests/fetch-local.ts",
+    "fetch-all": "npm run tsx:base -- packages/server/tests/fetch-all.ts",
     "t": "npm run test-local",
     "bench": "tsx --test test/bench.test.ts",
     "test-bench": "tsx --test test/bench.test.ts",
     "test-local": "tsx --test test/local.test.ts",
     "test-docker": "tsx --test test/docker.test.ts",
     "test-integrations": "tsx --test test/integrations.test.ts",
     "clean": "tsx scripts/cleanContent.ts",
-    "bun-as": "bun --env-file=.env --no-warnings src/autoshow.ts",
-    "deno-as": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env src/autoshow.ts"
+    "bun": "bun --env-file=.env --no-warnings src/autoshow.ts",
+    "deno": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env --unstable-sloppy-imports src/autoshow.ts"
   },
   "dependencies": {
     "@anthropic-ai/sdk": "0.30.1",

diff --git a/scripts/cleanContent.ts b/scripts/cleanContent.ts
@@ -1,7 +1,7 @@
 // scripts/cleanContent.ts
 
-import { exec } from 'child_process'
-import { promisify } from 'util'
+import { exec } from 'node:child_process'
+import { promisify } from 'node:util'
 import { l, err } from '../src/globals.js'
 
 const execAsync = promisify(exec)

diff --git a/src/autoshow.ts b/src/autoshow.ts
@@ -15,6 +15,7 @@ import { Command } from 'commander'
 import { handleInteractivePrompt } from './interactive.js'
 import { processVideo } from './commands/processVideo.js'
 import { processPlaylist } from './commands/processPlaylist.js'
+import { processChannel } from './commands/processChannel.js'
 import { processURLs } from './commands/processURLs.js'
 import { processFile } from './commands/processFile.js'
 import { processRSS } from './commands/processRSS.js'
@@ -38,6 +39,7 @@ program
   // Input source options
   .option('-v, --video <url>', 'Process a single YouTube video')
   .option('-p, --playlist <playlistUrl>', 'Process all videos in a YouTube playlist')
+  .option('-c, --channel <channelUrl>', 'Process all videos in a YouTube channel')
   .option('-u, --urls <filePath>', 'Process YouTube videos from a list of URLs in a file')
   .option('-f, --file <filePath>', 'Process a local audio or video file')
   .option('-r, --rss <rssURL>', 'Process a podcast RSS feed')
@@ -46,7 +48,7 @@ program
   .option('--order <order>', 'Specify the order for RSS feed processing (newest or oldest)')
   .option('--skip <number>', 'Number of items to skip when processing RSS feed', parseInt)
   .option('--last <number>', 'Number of most recent items to process (overrides --order and --skip)', parseInt)
-  .option('--info', 'Generate JSON file with RSS feed information instead of processing items')
+  .option('--info', 'Generate JSON file with RSS feed or channel information instead of processing items')
   // Transcription service options
   .option('--whisper [model]', 'Use Whisper.cpp for transcription with optional model specification')
   .option('--whisperDocker [model]', 'Use Whisper.cpp in Docker for transcription with optional model specification')
@@ -56,15 +58,15 @@ program
   .option('--assembly', 'Use AssemblyAI for transcription')
   .option('--speakerLabels', 'Use speaker labels for AssemblyAI transcription')
   // LLM service options
+  .option('--ollama [model]', 'Use Ollama for processing with optional model specification')
   .option('--chatgpt [model]', 'Use ChatGPT for processing with optional model specification')
   .option('--claude [model]', 'Use Claude for processing with optional model specification')
+  .option('--gemini [model]', 'Use Gemini for processing with optional model specification')
   .option('--cohere [model]', 'Use Cohere for processing with optional model specification')
   .option('--mistral [model]', 'Use Mistral for processing')
   .option('--fireworks [model]', 'Use Fireworks AI for processing with optional model specification')
   .option('--together [model]', 'Use Together AI for processing with optional model specification')
   .option('--groq [model]', 'Use Groq for processing with optional model specification')
-  .option('--ollama [model]', 'Use Ollama for processing with optional model specification')
-  .option('--gemini [model]', 'Use Gemini for processing with optional model specification')
   // Utility options
   .option('--prompt <sections...>', 'Specify prompt sections to include')
   .option('--noCleanUp', 'Do not delete intermediary files after processing')
@@ -75,6 +77,7 @@ program
 Examples:
   $ autoshow --video "https://www.youtube.com/watch?v=..."
   $ autoshow --playlist "https://www.youtube.com/playlist?list=..."
+  $ autoshow --channel "https://www.youtube.com/channel/..."
   $ autoshow --file "content/audio.mp3"
   $ autoshow --rss "https://feeds.transistor.fm/fsjam-podcast/"
 
@@ -99,6 +102,7 @@ program.action(async (options: ProcessingOptions) => {
   const PROCESS_HANDLERS: Record<string, HandlerFunction> = {
     video: processVideo,
     playlist: processPlaylist,
+    channel: processChannel,
     urls: processURLs,
     file: processFile,
     rss: processRSS,
@@ -142,20 +146,17 @@ program.action(async (options: ProcessingOptions) => {
         llmServices,
         transcriptServices
       )
-      // Log success message
       l(final(`\n================================================================================================`))
       l(final(`  ${action} Processing Completed Successfully.`))
       l(final(`================================================================================================\n`))
       exit(0)
     } catch (error) {
-      // Log error and exit if processing fails
       err(`Error processing ${action}:`, (error as Error).message)
       exit(1)
     }
   }
 })
 
-// Set up error handling for unknown commands
 program.on('command:*', function () {
   err(`Error: Invalid command '${program.args.join(' ')}'. Use --help to see available commands.`)
   exit(1)

diff --git a/src/commands/processChannel.ts b/src/commands/processChannel.ts
@@ -0,0 +1,147 @@
+// src/commands/processChannel.ts
+
+/**
+ * @file Processes an entire YouTube channel, handling metadata extraction and individual video processing.
+ * @packageDocumentation
+ */
+
+import { writeFile } from 'node:fs/promises'
+import { processVideo } from './processVideo.js'
+import { l, err, opts, success, execFilePromise } from '../globals.js'
+import type {
+  LLMServices, TranscriptServices, ProcessingOptions, VideoMetadata,
+} from '../types.js'
+
+/**
+ * Processes an entire YouTube channel by:
+ * 1. Fetching all video URLs from the channel using yt-dlp.
+ * 2. Optionally extracting metadata for all videos.
+ * 3. Processing each video sequentially with error handling.
+ *
+ * The function continues processing remaining videos even if individual videos fail.
+ *
+ * @param options - Configuration options for processing.
+ * @param channelUrl - URL of the YouTube channel to process.
+ * @param llmServices - Optional language model service for transcript processing.
+ * @param transcriptServices - Optional transcription service for audio conversion.
+ * @throws Will terminate the process with exit code 1 if the channel itself cannot be processed.
+ * @returns Promise that resolves when all videos have been processed.
+ */
+export async function processChannel(
+  options: ProcessingOptions,
+  channelUrl: string,
+  llmServices?: LLMServices,
+  transcriptServices?: TranscriptServices
+): Promise<void> {
+  // Log the processing parameters for debugging purposes
+  l(opts('Parameters passed to processChannel:\n'))
+  l(opts(`  - llmServices: ${llmServices}\n  - transcriptServices: ${transcriptServices}`))
+
+  try {
+    // Extract all video URLs from the channel using yt-dlp
+    const { stdout, stderr } = await execFilePromise('yt-dlp', [
+      '--flat-playlist',
+      '--print', 'url',
+      '--no-warnings',
+      channelUrl,
+    ])
+
+    // Log any warnings from yt-dlp
+    if (stderr) {
+      err(`yt-dlp warnings: ${stderr}`)
+    }
+
+    // Convert stdout into array of video URLs, removing empty entries
+    const urls = stdout.trim().split('\n').filter(Boolean)
+
+    // Exit if no videos were found in the channel
+    if (urls.length === 0) {
+      err('Error: No videos found in the channel.')
+      process.exit(1)
+    }
+
+    l(opts(`\nFound ${urls.length} videos in the channel...`))
+
+    // If the --info option is provided, extract metadata for all videos
+    if (options.info) {
+      // Collect metadata for all videos in parallel
+      const metadataList = await Promise.all(
+        urls.map(async (url) => {
+          try {
+            // Execute yt-dlp command to extract metadata
+            const { stdout } = await execFilePromise('yt-dlp', [
+              '--restrict-filenames',
+              '--print', '%(webpage_url)s',
+              '--print', '%(channel)s',
+              '--print', '%(uploader_url)s',
+              '--print', '%(title)s',
+              '--print', '%(upload_date>%Y-%m-%d)s',
+              '--print', '%(thumbnail)s',
+              url,
+            ])
+
+            // Split the output into individual metadata fields
+            const [showLink, channel, channelURL, title, publishDate, coverImage] = stdout
+              .trim()
+              .split('\n')
+
+            // Validate that all required metadata fields are present
+            if (!showLink || !channel || !channelURL || !title || !publishDate || !coverImage) {
+              throw new Error('Incomplete metadata received from yt-dlp.')
+            }
+
+            // Return the metadata object
+            return {
+              showLink,
+              channel,
+              channelURL,
+              title,
+              description: '',
+              publishDate,
+              coverImage,
+            } as VideoMetadata
+          } catch (error) {
+            // Log error but return null to filter out failed extractions
+            err(
+              `Error extracting metadata for ${url}: ${
+                error instanceof Error ? error.message : String(error)
+              }`
+            )
+            return null
+          }
+        })
+      )
+
+      // Filter out any null results due to errors
+      const validMetadata = metadataList.filter(
+        (metadata): metadata is VideoMetadata => metadata !== null
+      )
+
+      // Save metadata to a JSON file
+      const jsonContent = JSON.stringify(validMetadata, null, 2)
+      const jsonFilePath = 'content/channel_info.json'
+      await writeFile(jsonFilePath, jsonContent)
+      l(success(`Channel information saved to: ${jsonFilePath}`))
+      return
+    }
+
+    // Process each video sequentially, with error handling for individual videos
+    for (const [index, url] of urls.entries()) {
+      // Visual separator for each video in the console
+      l(opts(`\n================================================================================================`))
+      l(opts(`  Processing video ${index + 1}/${urls.length}: ${url}`))
+      l(opts(`================================================================================================\n`))
+      try {
+        // Process the video using the existing processVideo function
+        await processVideo(options, url, llmServices, transcriptServices)
+      } catch (error) {
+        // Log error but continue processing remaining videos
+        err(`Error processing video ${url}: ${(error as Error).message}`)
+      }
+    }
+  } catch (error) {
+    // Handle fatal errors that prevent channel processing
+    err(`Error processing channel: ${(error as Error).message}`)
+    process.exit(1)
+  }
+}