Skip to content

Commit

Permalink
Merge pull request #39 from ajcwebdev/next
Browse files Browse the repository at this point in the history
Add Channel Option
  • Loading branch information
ajcwebdev authored Nov 1, 2024
2 parents 144aac9 + d036493 commit 1060b56
Show file tree
Hide file tree
Showing 16 changed files with 985 additions and 403 deletions.
13 changes: 10 additions & 3 deletions docs/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ npm run as -- \
--info
```

### Process All Videos from a YouTube Channel

```bash
npm run as -- \
--channel "https://www.youtube.com/@ajcwebdev"
```

### Process Multiple Videos Specified in a URLs File

Run on an arbitrary list of URLs in `example-urls.md`.
Expand Down Expand Up @@ -648,14 +655,14 @@ npm run prune
### Bun

```bash
bun bun-as -- \
npm run bun -- \
--video "https://www.youtube.com/watch?v=MORMZXEaONk"
```

### Deno

```bash
deno task deno-as \
npm run deno -- \
--video "https://www.youtube.com/watch?v=MORMZXEaONk"
```

Expand All @@ -667,7 +674,7 @@ Integration test.
- Mostly uses transcripts of videos around one minute long and cheaper models when possible, so the total cost of running this for any given service should be at most only a few cents.

```bash
npm run test-all
npm run test-integrations
```

Local services test, only uses Whisper for transcription and Ollama for LLM operations.
Expand Down
32 changes: 18 additions & 14 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,33 +20,37 @@
"autoshow": "./dist/autoshow.js"
},
"scripts": {
"tsx:base": "tsx --env-file=.env --no-warnings",
"setup": "bash ./scripts/setup.sh",
"setup-python": "bash ./scripts/setup-python.sh",
"autoshow": "tsx --env-file=.env --no-warnings src/autoshow.ts",
"as": "tsx --env-file=.env --no-warnings src/autoshow.ts",
"autoshow": "npm run tsx:base -- src/autoshow.ts",
"as": "npm run tsx:base -- src/autoshow.ts",
"v": "npm run tsx:base -- src/autoshow.ts --video",
"u": "npm run tsx:base -- src/autoshow.ts --urls",
"urls": "npm run tsx:base -- src/autoshow.ts --urls content/urls.md",
"p": "npm run tsx:base -- src/autoshow.ts --playlist",
"f": "npm run tsx:base -- src/autoshow.ts --file",
"r": "npm run tsx:base -- src/autoshow.ts --rss",
"rss-info": "npm run tsx:base -- src/autoshow.ts --info --rss",
"info": "npm run tsx:base -- src/autoshow.ts --info",
"last2": "npm run tsx:base -- src/autoshow.ts --last 2 --rss",
"last3": "npm run tsx:base -- src/autoshow.ts --last 3 --rss",
"docker": "docker compose run --remove-orphans --rm autoshow --whisperDocker",
"docker-up": "docker compose up --build -d --remove-orphans --no-start",
"ds": "docker compose images && docker compose ls",
"prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af",
"v": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --video",
"u": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --urls",
"p": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --playlist",
"f": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --file",
"r": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --rss",
"last2": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --last 2 --rss",
"last3": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --last 3 --rss",
"serve": "tsx --env-file=.env --no-warnings --watch packages/server/index.ts",
"fetch-local": "tsx --env-file=.env --no-warnings packages/server/tests/fetch-local.ts",
"fetch-all": "tsx --env-file=.env --no-warnings packages/server/tests/fetch-all.ts",
"serve": "npm run tsx:base -- --watch packages/server/index.ts",
"fetch-local": "npm run tsx:base -- packages/server/tests/fetch-local.ts",
"fetch-all": "npm run tsx:base -- packages/server/tests/fetch-all.ts",
"t": "npm run test-local",
"bench": "tsx --test test/bench.test.ts",
"test-bench": "tsx --test test/bench.test.ts",
"test-local": "tsx --test test/local.test.ts",
"test-docker": "tsx --test test/docker.test.ts",
"test-integrations": "tsx --test test/integrations.test.ts",
"clean": "tsx scripts/cleanContent.ts",
"bun-as": "bun --env-file=.env --no-warnings src/autoshow.ts",
"deno-as": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env src/autoshow.ts"
"bun": "bun --env-file=.env --no-warnings src/autoshow.ts",
"deno": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env --unstable-sloppy-imports src/autoshow.ts"
},
"dependencies": {
"@anthropic-ai/sdk": "0.30.1",
Expand Down
4 changes: 2 additions & 2 deletions scripts/cleanContent.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// scripts/cleanContent.ts

import { exec } from 'child_process'
import { promisify } from 'util'
import { exec } from 'node:child_process'
import { promisify } from 'node:util'
import { l, err } from '../src/globals.js'

const execAsync = promisify(exec)
Expand Down
13 changes: 7 additions & 6 deletions src/autoshow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import { Command } from 'commander'
import { handleInteractivePrompt } from './interactive.js'
import { processVideo } from './commands/processVideo.js'
import { processPlaylist } from './commands/processPlaylist.js'
import { processChannel } from './commands/processChannel.js'
import { processURLs } from './commands/processURLs.js'
import { processFile } from './commands/processFile.js'
import { processRSS } from './commands/processRSS.js'
Expand All @@ -38,6 +39,7 @@ program
// Input source options
.option('-v, --video <url>', 'Process a single YouTube video')
.option('-p, --playlist <playlistUrl>', 'Process all videos in a YouTube playlist')
.option('-c, --channel <channelUrl>', 'Process all videos in a YouTube channel')
.option('-u, --urls <filePath>', 'Process YouTube videos from a list of URLs in a file')
.option('-f, --file <filePath>', 'Process a local audio or video file')
.option('-r, --rss <rssURL>', 'Process a podcast RSS feed')
Expand All @@ -46,7 +48,7 @@ program
.option('--order <order>', 'Specify the order for RSS feed processing (newest or oldest)')
.option('--skip <number>', 'Number of items to skip when processing RSS feed', parseInt)
.option('--last <number>', 'Number of most recent items to process (overrides --order and --skip)', parseInt)
.option('--info', 'Generate JSON file with RSS feed information instead of processing items')
.option('--info', 'Generate JSON file with RSS feed or channel information instead of processing items')
// Transcription service options
.option('--whisper [model]', 'Use Whisper.cpp for transcription with optional model specification')
.option('--whisperDocker [model]', 'Use Whisper.cpp in Docker for transcription with optional model specification')
Expand All @@ -56,15 +58,15 @@ program
.option('--assembly', 'Use AssemblyAI for transcription')
.option('--speakerLabels', 'Use speaker labels for AssemblyAI transcription')
// LLM service options
.option('--ollama [model]', 'Use Ollama for processing with optional model specification')
.option('--chatgpt [model]', 'Use ChatGPT for processing with optional model specification')
.option('--claude [model]', 'Use Claude for processing with optional model specification')
.option('--gemini [model]', 'Use Gemini for processing with optional model specification')
.option('--cohere [model]', 'Use Cohere for processing with optional model specification')
.option('--mistral [model]', 'Use Mistral for processing')
.option('--fireworks [model]', 'Use Fireworks AI for processing with optional model specification')
.option('--together [model]', 'Use Together AI for processing with optional model specification')
.option('--groq [model]', 'Use Groq for processing with optional model specification')
.option('--ollama [model]', 'Use Ollama for processing with optional model specification')
.option('--gemini [model]', 'Use Gemini for processing with optional model specification')
// Utility options
.option('--prompt <sections...>', 'Specify prompt sections to include')
.option('--noCleanUp', 'Do not delete intermediary files after processing')
Expand All @@ -75,6 +77,7 @@ program
Examples:
$ autoshow --video "https://www.youtube.com/watch?v=..."
$ autoshow --playlist "https://www.youtube.com/playlist?list=..."
$ autoshow --channel "https://www.youtube.com/channel/..."
$ autoshow --file "content/audio.mp3"
$ autoshow --rss "https://feeds.transistor.fm/fsjam-podcast/"
Expand All @@ -99,6 +102,7 @@ program.action(async (options: ProcessingOptions) => {
const PROCESS_HANDLERS: Record<string, HandlerFunction> = {
video: processVideo,
playlist: processPlaylist,
channel: processChannel,
urls: processURLs,
file: processFile,
rss: processRSS,
Expand Down Expand Up @@ -142,20 +146,17 @@ program.action(async (options: ProcessingOptions) => {
llmServices,
transcriptServices
)
// Log success message
l(final(`\n================================================================================================`))
l(final(` ${action} Processing Completed Successfully.`))
l(final(`================================================================================================\n`))
exit(0)
} catch (error) {
// Log error and exit if processing fails
err(`Error processing ${action}:`, (error as Error).message)
exit(1)
}
}
})

// Set up error handling for unknown commands
program.on('command:*', function () {
err(`Error: Invalid command '${program.args.join(' ')}'. Use --help to see available commands.`)
exit(1)
Expand Down
147 changes: 147 additions & 0 deletions src/commands/processChannel.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
// src/commands/processChannel.ts

/**
* @file Processes an entire YouTube channel, handling metadata extraction and individual video processing.
* @packageDocumentation
*/

import { writeFile } from 'node:fs/promises'
import { processVideo } from './processVideo.js'
import { l, err, opts, success, execFilePromise } from '../globals.js'
import type {
LLMServices, TranscriptServices, ProcessingOptions, VideoMetadata,
} from '../types.js'

/**
* Processes an entire YouTube channel by:
* 1. Fetching all video URLs from the channel using yt-dlp.
* 2. Optionally extracting metadata for all videos.
* 3. Processing each video sequentially with error handling.
*
* The function continues processing remaining videos even if individual videos fail.
*
* @param options - Configuration options for processing.
* @param channelUrl - URL of the YouTube channel to process.
* @param llmServices - Optional language model service for transcript processing.
* @param transcriptServices - Optional transcription service for audio conversion.
* @throws Will terminate the process with exit code 1 if the channel itself cannot be processed.
* @returns Promise that resolves when all videos have been processed.
*/
export async function processChannel(
options: ProcessingOptions,
channelUrl: string,
llmServices?: LLMServices,
transcriptServices?: TranscriptServices
): Promise<void> {
// Log the processing parameters for debugging purposes
l(opts('Parameters passed to processChannel:\n'))
l(opts(` - llmServices: ${llmServices}\n - transcriptServices: ${transcriptServices}`))

try {
// Extract all video URLs from the channel using yt-dlp
const { stdout, stderr } = await execFilePromise('yt-dlp', [
'--flat-playlist',
'--print', 'url',
'--no-warnings',
channelUrl,
])

// Log any warnings from yt-dlp
if (stderr) {
err(`yt-dlp warnings: ${stderr}`)
}

// Convert stdout into array of video URLs, removing empty entries
const urls = stdout.trim().split('\n').filter(Boolean)

// Exit if no videos were found in the channel
if (urls.length === 0) {
err('Error: No videos found in the channel.')
process.exit(1)
}

l(opts(`\nFound ${urls.length} videos in the channel...`))

// If the --info option is provided, extract metadata for all videos
if (options.info) {
// Collect metadata for all videos in parallel
const metadataList = await Promise.all(
urls.map(async (url) => {
try {
// Execute yt-dlp command to extract metadata
const { stdout } = await execFilePromise('yt-dlp', [
'--restrict-filenames',
'--print', '%(webpage_url)s',
'--print', '%(channel)s',
'--print', '%(uploader_url)s',
'--print', '%(title)s',
'--print', '%(upload_date>%Y-%m-%d)s',
'--print', '%(thumbnail)s',
url,
])

// Split the output into individual metadata fields
const [showLink, channel, channelURL, title, publishDate, coverImage] = stdout
.trim()
.split('\n')

// Validate that all required metadata fields are present
if (!showLink || !channel || !channelURL || !title || !publishDate || !coverImage) {
throw new Error('Incomplete metadata received from yt-dlp.')
}

// Return the metadata object
return {
showLink,
channel,
channelURL,
title,
description: '',
publishDate,
coverImage,
} as VideoMetadata
} catch (error) {
// Log error but return null to filter out failed extractions
err(
`Error extracting metadata for ${url}: ${
error instanceof Error ? error.message : String(error)
}`
)
return null
}
})
)

// Filter out any null results due to errors
const validMetadata = metadataList.filter(
(metadata): metadata is VideoMetadata => metadata !== null
)

// Save metadata to a JSON file
const jsonContent = JSON.stringify(validMetadata, null, 2)
const jsonFilePath = 'content/channel_info.json'
await writeFile(jsonFilePath, jsonContent)
l(success(`Channel information saved to: ${jsonFilePath}`))
return
}

// Process each video sequentially, with error handling for individual videos
for (const [index, url] of urls.entries()) {
// Visual separator for each video in the console
l(opts(`\n================================================================================================`))
l(opts(` Processing video ${index + 1}/${urls.length}: ${url}`))
l(opts(`================================================================================================\n`))
try {
// Process the video using the existing processVideo function
await processVideo(options, url, llmServices, transcriptServices)
} catch (error) {
// Log error but continue processing remaining videos
err(`Error processing video ${url}: ${(error as Error).message}`)
}
}
} catch (error) {
// Handle fatal errors that prevent channel processing
err(`Error processing channel: ${(error as Error).message}`)
process.exit(1)
}
}
Loading

0 comments on commit 1060b56

Please sign in to comment.