Skip to content

Commit

Permalink
Merge pull request #37 from ajcwebdev/bench
Browse files Browse the repository at this point in the history
Benchmark for different sized models with `whisper.cpp`, `openai-whisper`, and `whisper-diarization`
  • Loading branch information
ajcwebdev authored Oct 29, 2024
2 parents f8ceb73 + 1dd8a18 commit d73573c
Show file tree
Hide file tree
Showing 20 changed files with 1,706 additions and 668 deletions.
470 changes: 312 additions & 158 deletions docs/examples.md

Large diffs are not rendered by default.

29 changes: 16 additions & 13 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,19 @@
"docker-up": "docker compose up --build -d --remove-orphans --no-start",
"ds": "docker compose images && docker compose ls",
"prune": "docker system prune -af --volumes && docker image prune -af && docker container prune -f && docker volume prune -af",
"v": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --video",
"u": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --urls",
"p": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --playlist",
"f": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --file",
"r": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --rss",
"last3": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v2 --last 3 --rss",
"v": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --video",
"u": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --urls",
"p": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --playlist",
"f": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --file",
"r": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --rss",
"last2": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --last 2 --rss",
"last3": "tsx --env-file=.env --no-warnings src/autoshow.ts --whisper large-v3-turbo --last 3 --rss",
"serve": "tsx --env-file=.env --no-warnings --watch packages/server/index.ts",
"fetch-local": "tsx --env-file=.env --no-warnings packages/server/tests/fetch-local.ts",
"fetch-all": "tsx --env-file=.env --no-warnings packages/server/tests/fetch-all.ts",
"t": "npm run test-local",
"bench": "tsx --test test/bench.test.ts",
"test-bench": "tsx --test test/bench.test.ts",
"test-local": "tsx --test test/local.test.ts",
"test-docker": "tsx --test test/docker.test.ts",
"test-integrations": "tsx --test test/integrations.test.ts",
Expand All @@ -44,8 +47,8 @@
"deno-as": "deno run --allow-sys --allow-read --allow-run --allow-write --allow-env src/autoshow.ts"
},
"dependencies": {
"@anthropic-ai/sdk": "0.29.0",
"@deepgram/sdk": "3.8.1",
"@anthropic-ai/sdk": "0.30.1",
"@deepgram/sdk": "3.9.0",
"@fastify/cors": "10.0.1",
"@google/generative-ai": "0.21.0",
"@mistralai/mistralai": "1.1.0",
Expand All @@ -56,17 +59,17 @@
"commander": "12.1.0",
"fast-xml-parser": "4.5.0",
"fastify": "5.0.0",
"file-type": "19.5.0",
"inquirer": "12.0.0",
"file-type": "19.6.0",
"inquirer": "12.0.1",
"node-llama-cpp": "3.1.1",
"ollama": "0.5.9",
"openai": "4.67.3"
"openai": "4.68.4"
},
"devDependencies": {
"@types/inquirer": "9.0.7",
"@types/node": "22.7.5",
"@types/node": "22.8.1",
"tsx": "4.19.1",
"typedoc": "^0.26.10",
"typedoc": "0.26.10",
"typescript": "5.6.3"
}
}
45 changes: 32 additions & 13 deletions src/autoshow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,35 +22,39 @@ import { argv, exit } from 'node:process'
import { log, opts, final, ACTION_OPTIONS, LLM_OPTIONS, TRANSCRIPT_OPTIONS } from './models.js'
import type { ProcessingOptions, HandlerFunction, LLMServices, TranscriptServices } from './types.js'

// Initialize the command-line interface
// Initialize the command-line interface using Commander.js
const program = new Command()

/**
* Defines the command-line interface options and descriptions.
* Sets up all available commands and their respective flags
*/
program
.name('autoshow')
.version('0.0.1')
.description('Automate processing of audio and video content from various sources.')
.usage('[options]')
.option('--prompt <sections...>', 'Specify prompt sections to include')
// Input source options
.option('-v, --video <url>', 'Process a single YouTube video')
.option('-p, --playlist <playlistUrl>', 'Process all videos in a YouTube playlist')
.option('-u, --urls <filePath>', 'Process YouTube videos from a list of URLs in a file')
.option('-f, --file <filePath>', 'Process a local audio or video file')
.option('-r, --rss <rssURL>', 'Process a podcast RSS feed')
// RSS feed specific options
.option('--item <itemUrls...>', 'Process specific items in the RSS feed by providing their audio URLs')
.option('--order <order>', 'Specify the order for RSS feed processing (newest or oldest)')
.option('--skip <number>', 'Number of items to skip when processing RSS feed', parseInt)
.option('--last <number>', 'Number of most recent items to process (overrides --order and --skip)', parseInt)
.option('--info', 'Generate JSON file with RSS feed information instead of processing items')
// Transcription service options
.option('--whisper [model]', 'Use Whisper.cpp for transcription with optional model specification')
.option('--whisperDocker [model]', 'Use Whisper.cpp in Docker for transcription with optional model specification')
.option('--whisperPython [model]', 'Use openai-whisper for transcription with optional model specification')
.option('--whisperDiarization [model]', 'Use whisper-diarization for transcription with optional model specification')
.option('--deepgram', 'Use Deepgram for transcription')
.option('--assembly', 'Use AssemblyAI for transcription')
.option('--speakerLabels', 'Use speaker labels for AssemblyAI transcription')
// LLM service options
.option('--chatgpt [model]', 'Use ChatGPT for processing with optional model specification')
.option('--claude [model]', 'Use Claude for processing with optional model specification')
.option('--cohere [model]', 'Use Cohere for processing with optional model specification')
Expand All @@ -62,6 +66,8 @@ program
.option('--llama [model]', 'Use Node Llama for processing with optional model specification')
.option('--ollama [model]', 'Use Ollama for processing with optional model specification')
.option('--gemini [model]', 'Use Gemini for processing with optional model specification')
// Utility options
.option('--prompt <sections...>', 'Specify prompt sections to include')
.option('--noCleanUp', 'Do not delete intermediary files after processing')
.option('-i, --interactive', 'Run in interactive mode')
.addHelpText(
Expand All @@ -80,6 +86,8 @@ Report Issues: https://github.com/ajcwebdev/autoshow/issues

/**
* Helper function to validate that only one option from a list is provided.
* Prevents users from specifying multiple conflicting options simultaneously.
*
* @param optionKeys - The list of option keys to check.
* @param options - The options object.
* @param errorMessage - The prefix of the error message.
Expand All @@ -90,7 +98,10 @@ function getSingleOption(
options: ProcessingOptions,
errorMessage: string
): string | undefined {
// Filter out which options from the provided list are actually set
const selectedOptions = optionKeys.filter((opt) => options[opt as keyof ProcessingOptions])

// If more than one option is selected, throw an error
if (selectedOptions.length > 1) {
console.error(`Error: Multiple ${errorMessage} provided (${selectedOptions.join(', ')}). Please specify only one.`)
exit(1)
Expand All @@ -100,13 +111,17 @@ function getSingleOption(

/**
* Main action for the program.
* Handles the processing of options and executes the appropriate command handler.
*
* @param options - The command-line options provided by the user.
*/
program.action(async (options: ProcessingOptions) => {
// Log received options for debugging purposes
log(opts(`Options received at beginning of command:\n`))
log(options)
log(``)

// Define mapping of action types to their handler functions
const PROCESS_HANDLERS: Record<string, HandlerFunction> = {
video: processVideo,
playlist: processPlaylist,
Expand All @@ -115,61 +130,65 @@ program.action(async (options: ProcessingOptions) => {
rss: processRSS,
}

// Extract interactive mode flag
const { interactive } = options

// Check if no action option was provided
const noActionProvided = ACTION_OPTIONS.every((opt) => !options[opt as keyof ProcessingOptions])

// If in interactive mode or no action provided, prompt user for input
if (interactive || noActionProvided) {
options = await handleInteractivePrompt(options)
}

// Ensure options.item is an array if provided via command line
// Ensure options.item is always an array if provided via command line
if (options.item && !Array.isArray(options.item)) {
options.item = [options.item]
}

// Validate and retrieve single action option
// Validate and get single options for action, LLM, and transcription
const action = getSingleOption(ACTION_OPTIONS, options, 'input option')

// Validate and retrieve single LLM option
const llmKey = getSingleOption(LLM_OPTIONS, options, 'LLM option')
const llmServices = llmKey as LLMServices | undefined

// Validate and retrieve single transcription option
const transcriptKey = getSingleOption(TRANSCRIPT_OPTIONS, options, 'transcription option')
const transcriptServices: TranscriptServices | undefined = transcriptKey as TranscriptServices | undefined

// Set default transcription service if not provided
// Set default transcription service to whisper if none provided
const finalTranscriptServices: TranscriptServices = transcriptServices || 'whisper'

// Set default Whisper model if not provided
// Set default Whisper model to 'large-v3-turbo' if whisper is selected but no model specified
if (finalTranscriptServices === 'whisper' && !options.whisper) {
options.whisper = 'base'
options.whisper = 'large-v3-turbo'
}

// Execute the appropriate handler if an action was specified
if (action) {
try {
// Process the content using the selected handler
await PROCESS_HANDLERS[action](
options,
options[action as keyof ProcessingOptions] as string,
llmServices,
finalTranscriptServices
)
// Log success message
log(final(`\n================================================================================================`))
log(final(` ${action} Processing Completed Successfully.`))
log(final(`================================================================================================\n`))
exit(0)
} catch (error) {
// Log error and exit if processing fails
console.error(`Error processing ${action}:`, (error as Error).message)
exit(1)
}
}
})

// Handle unknown commands
// Set up error handling for unknown commands
program.on('command:*', function () {
console.error(`Error: Invalid command '${program.args.join(' ')}'. Use --help to see available commands.`)
exit(1)
})

// Parse the command-line arguments
// Parse the command-line arguments and execute the program
program.parse(argv)
51 changes: 39 additions & 12 deletions src/commands/processFile.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
// src/commands/processFile.ts

/**
* @file Process a local audio or video file for transcription and analysis.
* @packageDocumentation
*/

import { generateMarkdown } from '../utils/generateMarkdown.js'
import { downloadAudio } from '../utils/downloadAudio.js'
import { runTranscription } from '../utils/runTranscription.js'
Expand All @@ -9,31 +14,53 @@ import { log, opts, wait } from '../models.js'
import type { LLMServices, TranscriptServices, ProcessingOptions } from '../types.js'

/**
* Main function to process a local audio or video file.
* @param {string} filePath - The path to the local file to process.
* @param {LLMServices} [llmServices] - The selected Language Model option.
* @param {TranscriptServices} [transcriptServices] - The transcription service to use.
* @param {ProcessingOptions} options - Additional options for processing.
* @returns {Promise<void>}
* Processes a local audio or video file through a series of operations:
* 1. Generates markdown with file metadata
* 2. Converts the file to the required audio format
* 3. Transcribes the audio content
* 4. Processes the transcript with a language model (if specified)
* 5. Cleans up temporary files (unless disabled)
*
* Unlike processVideo, this function handles local files and doesn't need
* to check for external dependencies like yt-dlp.
*
* @param options - Configuration options for processing
* @param filePath - Path to the local audio or video file to process
* @param llmServices - Optional language model service to use for processing the transcript
* @param transcriptServices - Optional transcription service to use for converting audio to text
* @throws Will terminate the process with exit code 1 if any processing step fails
* @returns Promise that resolves when all processing is complete
*/
export async function processFile(
options: ProcessingOptions,
filePath: string,
llmServices?: LLMServices,
transcriptServices?: TranscriptServices
): Promise<void> {
// Log the processing parameters for debugging purposes
log(opts('Parameters passed to processFile:\n'))
log(wait(` - llmServices: ${llmServices}\n - transcriptServices: ${transcriptServices}\n`))

try {
const { frontMatter, finalPath, filename } = await generateMarkdown(options, filePath) // Generate markdown for the file
await downloadAudio(options, filePath, filename) // Convert the audio or video file to the required format
await runTranscription(options, finalPath, frontMatter, transcriptServices) // Run transcription on the file
await runLLM(options, finalPath, frontMatter, llmServices) // Process the transcript with the selected Language Model
if (!options.noCleanUp) { // Clean up temporary files if the noCleanUp option is not set
// Generate markdown file with file metadata and get file paths
const { frontMatter, finalPath, filename } = await generateMarkdown(options, filePath)

// Convert the input file to the required audio format for processing
await downloadAudio(options, filePath, filename)

// Convert the audio to text using the specified transcription service
await runTranscription(options, finalPath, frontMatter, transcriptServices)

// Process the transcript with a language model if one was specified
await runLLM(options, finalPath, frontMatter, llmServices)

// Remove temporary files unless the noCleanUp option is set
if (!options.noCleanUp) {
await cleanUpFiles(finalPath)
}
} catch (error) {
// Log the error and terminate the process with error code
console.error(`Error processing file: ${(error as Error).message}`)
process.exit(1) // Exit with an error code
process.exit(1)
}
}
Loading

0 comments on commit d73573c

Please sign in to comment.